2 * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database specify which TCP/IP sockets needs to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a ingle filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is send by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 * The CFM_OP_DATA_UPDATE action messages let the user space filter
134 * agent allow data to flow up to the specified pass offset -- there
135 * is a pass offset for outgoing data and a pass offset for incoming data.
136 * When a new TCP/IP socket is attached to the content filter, each pass offset
137 * is initially set to 0 so not data is allowed to pass by default.
138 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
139 * then the data flow becomes unrestricted.
141 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
142 * with a pass offset smaller than the pass offset of a previous
143 * CFM_OP_DATA_UPDATE message is silently ignored.
145 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
146 * to tell the kernel how much data it wants to see by using the peek offsets.
147 * Just like pass offsets, there is a peek offset for each direction.
148 * When a new TCP/IP socket is attached to the content filter, each peek offset
149 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
150 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
151 * with a greater than 0 peek offset is sent by the user space filter agent.
152 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
153 * then the flow of update data events becomes unrestricted.
155 * Note that peek offsets cannot be smaller than the corresponding pass offset.
156 * Also a peek offsets cannot be smaller than the corresponding end offset
157 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
158 * to set a too small peek value is silently ignored.
161 * PER SOCKET "struct cfil_info"
163 * As soon as a TCP/IP socket gets attached to a content filter, a
164 * "struct cfil_info" is created to hold the content filtering state for this
167 * The content filtering state is made of the following information
168 * for each direction:
169 * - The current pass offset;
170 * - The first and last offsets of the data pending, waiting for a filtering
172 * - The inject queue for data that passed the filters and that needs
174 * - A content filter specific state in a set of "struct cfil_entry"
177 * CONTENT FILTER STATE "struct cfil_entry"
179 * The "struct cfil_entry" maintains the information most relevant to the
180 * message handling over a kernel control socket with a user space filter agent.
182 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
183 * to the kernel control socket unit it corresponds to and also has a pointer
184 * to the corresponding "struct content_filter".
186 * For each direction, "struct cfil_entry" maintains the following information:
189 * - The offset of the last data peeked at by the filter
190 * - A queue of data that's waiting to be delivered to the user space filter
191 * agent on the kernel control socket
192 * - A queue of data for which event messages have been sent on the kernel
193 * control socket and are pending for a filtering decision.
196 * CONTENT FILTER QUEUES
198 * Data that is being filtered is steered away from the TCP/IP socket buffer
199 * and instead will sit in one of three content filter queue until the data
200 * can be re-injected into the TCP/IP socket buffer.
202 * A content filter queue is represented by "struct cfil_queue" that contains
203 * a list of mbufs and the start and end offset of the data span of
206 * The data moves into the three content filter queues according to this
208 * a) The "cfe_ctl_q" of "struct cfil_entry"
209 * b) The "cfe_pending_q" of "struct cfil_entry"
210 * c) The "cfi_inject_q" of "struct cfil_info"
212 * Note: The seqyence (a),(b) may be repeated several times if there are more
213 * than one content filter attached to the TCP/IP socket.
215 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
216 * kernel conntrol socket for two reasons:
217 * - The peek offset is less that the end offset of the mbuf data
218 * - The kernel control socket is flow controlled
220 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
221 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
222 * socket and are waiting for a pass action message fromn the user space
223 * filter agent. An mbuf length must be fully allowed to pass to be removed
224 * from the cfe_pending_q.
226 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
227 * by the user space filter agent and that needs to be re-injected into the
231 * IMPACT ON FLOW CONTROL
233 * An essential aspect of the content filer subsystem is to minimize the
234 * impact on flow control of the TCP/IP sockets being filtered.
236 * The processing overhead of the content filtering may have an effect on
237 * flow control by adding noticeable delays and cannot be eliminated --
238 * care must be taken by the user space filter agent to minimize the
241 * The amount of data being filtered is kept in buffers while waiting for
242 * a decision by the user space filter agent. This amount of data pending
243 * needs to be subtracted from the amount of data available in the
244 * corresponding TCP/IP socket buffer. This is done by modifying
245 * sbspace() and tcp_sbspace() to account for amount of data pending
246 * in the content filter.
251 * The global state of content filter subsystem is protected by a single
252 * read-write lock "cfil_lck_rw". The data flow can be done with the
253 * cfil read-write lock held as shared so it can be re-entered from multiple
256 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
257 * protected by the socket lock.
259 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
260 * is held. That's why we have some sequences where we drop the cfil read-write
261 * lock before taking the TCP/IP lock.
263 * It is also important to lock the TCP/IP socket buffer while the content
264 * filter is modifying the amount of pending data. Otherwise the calculations
265 * in sbspace() and tcp_sbspace() could be wrong.
267 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
268 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
270 * Actually "cfe_link" and "cfe_filter" are protected by both by
271 * "cfil_lck_rw" and the socket lock: they may be modified only when
272 * "cfil_lck_rw" is exclusive and the socket is locked.
274 * To read the other fields of "struct content_filter" we have to take
275 * "cfil_lck_rw" in shared mode.
280 * - For TCP sockets only
282 * - Does not support TCP unordered messages
294 * If support datagram, enqueue control and address mbufs as well
297 #include <sys/types.h>
298 #include <sys/kern_control.h>
299 #include <sys/queue.h>
300 #include <sys/domain.h>
301 #include <sys/protosw.h>
302 #include <sys/syslog.h>
304 #include <kern/locks.h>
305 #include <kern/zalloc.h>
306 #include <kern/debug.h>
308 #include <net/content_filter.h>
310 #include <netinet/in_pcb.h>
311 #include <netinet/tcp.h>
312 #include <netinet/tcp_var.h>
315 #include <libkern/libkern.h>
318 #define MAX_CONTENT_FILTER 2
323 * The structure content_filter represents a user space content filter
324 * It's created and associated with a kernel control socket instance
326 struct content_filter
{
327 kern_ctl_ref cf_kcref
;
331 uint32_t cf_necp_control_unit
;
333 uint32_t cf_sock_count
;
334 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
337 #define CFF_ACTIVE 0x01
338 #define CFF_DETACHING 0x02
339 #define CFF_FLOW_CONTROLLED 0x04
341 struct content_filter
**content_filters
= NULL
;
342 uint32_t cfil_active_count
= 0; /* Number of active content filters */
343 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
344 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
346 static kern_ctl_ref cfil_kctlref
= NULL
;
348 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
349 static lck_attr_t
*cfil_lck_attr
= NULL
;
350 static lck_grp_t
*cfil_lck_grp
= NULL
;
351 decl_lck_rw_data(static, cfil_lck_rw
);
353 #define CFIL_RW_LCK_MAX 8
355 int cfil_rw_nxt_lck
= 0;
356 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
358 int cfil_rw_nxt_unlck
= 0;
359 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
361 #define CONTENT_FILTER_ZONE_NAME "content_filter"
362 #define CONTENT_FILTER_ZONE_MAX 10
363 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
366 #define CFIL_INFO_ZONE_NAME "cfil_info"
367 #define CFIL_INFO_ZONE_MAX 1024
368 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
370 MBUFQ_HEAD(cfil_mqhead
);
373 uint64_t q_start
; /* offset of first byte in queue */
374 uint64_t q_end
; /* offset of last byte in queue */
375 struct cfil_mqhead q_mq
;
381 * The is one entry per content filter
384 TAILQ_ENTRY(cfil_entry
) cfe_link
;
385 struct content_filter
*cfe_filter
;
387 struct cfil_info
*cfe_cfil_info
;
389 uint32_t cfe_necp_control_unit
;
390 struct timeval cfe_last_event
; /* To user space */
391 struct timeval cfe_last_action
; /* From user space */
395 * cfe_pending_q holds data that has been delivered to
396 * the filter and for which we are waiting for an action
398 struct cfil_queue cfe_pending_q
;
400 * This queue is for data that has not be delivered to
401 * the content filter (new data, pass peek or flow control)
403 struct cfil_queue cfe_ctl_q
;
405 uint64_t cfe_pass_offset
;
406 uint64_t cfe_peek_offset
;
411 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
412 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
413 #define CFEF_DATA_START 0x0004 /* can send data event */
414 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
415 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
416 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
417 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
418 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
423 * There is a struct cfil_info per socket
426 TAILQ_ENTRY(cfil_info
) cfi_link
;
427 struct socket
*cfi_so
;
429 uint64_t cfi_sock_id
;
433 * cfi_pending_first and cfi_pending_last describe the total
434 * amount of data outstanding for all the filters on
435 * this socket and data in the flow queue
436 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
438 uint64_t cfi_pending_first
;
439 uint64_t cfi_pending_last
;
440 int cfi_pending_mbcnt
;
442 * cfi_pass_offset is the minimum of all the filters
444 uint64_t cfi_pass_offset
;
446 * cfi_inject_q holds data that needs to be re-injected
447 * into the socket after filtering and that can
448 * be queued because of flow control
450 struct cfil_queue cfi_inject_q
;
453 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
456 #define CFIF_DROP 0x0001 /* drop action applied */
457 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
458 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
459 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
460 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
461 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
462 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
464 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
465 #define CFI_SHIFT_GENCNT 32
466 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
467 #define CFI_SHIFT_FLOWHASH 0
469 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
471 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
472 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
478 struct cfil_stats cfil_stats
;
481 * For troubleshooting
483 int cfil_log_level
= LOG_ERR
;
487 * Sysctls for logs and statistics
489 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
490 struct sysctl_req
*);
491 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
492 struct sysctl_req
*);
494 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "cfil");
496 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
497 &cfil_log_level
, 0, "");
499 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
502 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
503 &cfil_sock_attached_count
, 0, "");
505 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
506 &cfil_active_count
, 0, "");
508 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
509 &cfil_close_wait_timeout
, 0, "");
511 static int cfil_sbtrim
= 1;
512 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
513 &cfil_sbtrim
, 0, "");
515 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
516 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
518 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
519 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
521 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
522 &cfil_stats
, cfil_stats
, "");
525 * Forward declaration to appease the compiler
527 static int cfil_action_data_pass(struct socket
*, uint32_t, int,
529 static int cfil_action_drop(struct socket
*, uint32_t);
530 static int cfil_dispatch_closed_event(struct socket
*, int);
531 static int cfil_data_common(struct socket
*, int, struct sockaddr
*,
532 struct mbuf
*, struct mbuf
*, uint32_t);
533 static int cfil_data_filter(struct socket
*, uint32_t, int,
534 struct mbuf
*, uint64_t);
535 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
536 struct in_addr
, u_int16_t
);
537 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
538 struct in6_addr
*, u_int16_t
);
539 static int cfil_dispatch_attach_event(struct socket
*, uint32_t);
540 static void cfil_info_free(struct socket
*, struct cfil_info
*);
541 static struct cfil_info
* cfil_info_alloc(struct socket
*);
542 static int cfil_info_attach_unit(struct socket
*, uint32_t);
543 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
);
544 static int cfil_service_pending_queue(struct socket
*, uint32_t, int);
545 static int cfil_data_service_ctl_q(struct socket
*, uint32_t, int);
546 static void cfil_info_verify(struct cfil_info
*);
547 static int cfil_update_data_offsets(struct socket
*, uint32_t, int,
549 static int cfil_acquire_sockbuf(struct socket
*, int);
550 static void cfil_release_sockbuf(struct socket
*, int);
551 static int cfil_filters_attached(struct socket
*);
553 static void cfil_rw_lock_exclusive(lck_rw_t
*);
554 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
555 static void cfil_rw_lock_shared(lck_rw_t
*);
556 static void cfil_rw_unlock_shared(lck_rw_t
*);
557 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
558 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
560 static unsigned int cfil_data_length(struct mbuf
*, int *);
563 * Content filter global read write lock
567 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
571 lr_saved
= __builtin_return_address(0);
573 lck_rw_lock_exclusive(lck
);
575 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
576 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
580 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
584 lr_saved
= __builtin_return_address(0);
586 lck_rw_unlock_exclusive(lck
);
588 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
589 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
593 cfil_rw_lock_shared(lck_rw_t
*lck
)
597 lr_saved
= __builtin_return_address(0);
599 lck_rw_lock_shared(lck
);
601 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
602 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
606 cfil_rw_unlock_shared(lck_rw_t
*lck
)
610 lr_saved
= __builtin_return_address(0);
612 lck_rw_unlock_shared(lck
);
614 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
615 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
619 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
624 lr_saved
= __builtin_return_address(0);
626 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
628 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
629 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
635 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
639 lr_saved
= __builtin_return_address(0);
641 lck_rw_lock_exclusive_to_shared(lck
);
643 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
644 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
648 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
651 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
655 socket_lock_assert_owned(struct socket
*so
)
657 lck_mtx_t
*mutex_held
;
659 if (so
->so_proto
->pr_getlock
!= NULL
)
660 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
662 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
664 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
668 * Return the number of bytes in the mbuf chain using the same
669 * method as m_length() or sballoc()
672 cfil_data_length(struct mbuf
*m
, int *retmbcnt
)
678 if (retmbcnt
== NULL
)
679 return (m_length(m
));
683 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
686 if (m0
->m_flags
& M_EXT
)
687 mbcnt
+= m0
->m_ext
.ext_size
;
694 * Common mbuf queue utilities
698 cfil_queue_init(struct cfil_queue
*cfq
)
702 MBUFQ_INIT(&cfq
->q_mq
);
705 static inline uint64_t
706 cfil_queue_drain(struct cfil_queue
*cfq
)
708 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
711 MBUFQ_DRAIN(&cfq
->q_mq
);
716 /* Return 1 when empty, 0 otherwise */
718 cfil_queue_empty(struct cfil_queue
*cfq
)
720 return (MBUFQ_EMPTY(&cfq
->q_mq
));
723 static inline uint64_t
724 cfil_queue_offset_first(struct cfil_queue
*cfq
)
726 return (cfq
->q_start
);
729 static inline uint64_t
730 cfil_queue_offset_last(struct cfil_queue
*cfq
)
735 static inline uint64_t
736 cfil_queue_len(struct cfil_queue
*cfq
)
738 return (cfq
->q_end
- cfq
->q_start
);
742 * Routines to verify some fundamental assumptions
746 cfil_queue_verify(struct cfil_queue
*cfq
)
750 uint64_t queuesize
= 0;
752 /* Verify offset are ordered */
753 VERIFY(cfq
->q_start
<= cfq
->q_end
);
756 * When queue is empty, the offsets are equal otherwise the offsets
759 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
760 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
761 cfq
->q_start
!= cfq
->q_end
));
763 MBUFQ_FOREACH(m
, &cfq
->q_mq
) {
764 size_t chainsize
= 0;
765 unsigned int mlen
= m_length(m
);
767 if (m
== (void *)M_TAG_FREE_PATTERN
||
768 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
769 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
)
770 panic("%s - mq %p is free at %p", __func__
,
772 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
773 if (n
->m_type
!= MT_DATA
&&
774 n
->m_type
!= MT_HEADER
&&
775 n
->m_type
!= MT_OOBDATA
)
776 panic("%s - %p unsupported type %u", __func__
,
778 chainsize
+= n
->m_len
;
780 if (mlen
!= chainsize
)
781 panic("%s - %p m_length() %u != chainsize %lu",
782 __func__
, m
, mlen
, chainsize
);
783 queuesize
+= chainsize
;
785 if (queuesize
!= cfq
->q_end
- cfq
->q_start
)
786 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
787 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
791 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
793 CFIL_QUEUE_VERIFY(cfq
);
795 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
798 CFIL_QUEUE_VERIFY(cfq
);
802 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
804 CFIL_QUEUE_VERIFY(cfq
);
806 VERIFY(m_length(m
) == len
);
808 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
809 MBUFQ_NEXT(m
) = NULL
;
812 CFIL_QUEUE_VERIFY(cfq
);
816 cfil_queue_first(struct cfil_queue
*cfq
)
818 return (MBUFQ_FIRST(&cfq
->q_mq
));
822 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
825 return (MBUFQ_NEXT(m
));
829 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
831 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
832 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
834 /* Verify the queues are ordered so that pending is before ctl */
835 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
837 /* The peek offset cannot be less than the pass offset */
838 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
840 /* Make sure we've updated the offset we peeked at */
841 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
845 cfil_entry_verify(struct cfil_entry
*entry
)
847 cfil_entry_buf_verify(&entry
->cfe_snd
);
848 cfil_entry_buf_verify(&entry
->cfe_rcv
);
852 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
854 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
856 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
857 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
861 cfil_info_verify(struct cfil_info
*cfil_info
)
865 if (cfil_info
== NULL
)
868 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
869 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
871 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++)
872 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
876 verify_content_filter(struct content_filter
*cfc
)
878 struct cfil_entry
*entry
;
881 VERIFY(cfc
->cf_sock_count
>= 0);
883 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
885 VERIFY(cfc
== entry
->cfe_filter
);
887 VERIFY(count
== cfc
->cf_sock_count
);
891 * Kernel control socket callbacks
894 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
898 struct content_filter
*cfc
= NULL
;
900 CFIL_LOG(LOG_NOTICE
, "");
902 cfc
= zalloc(content_filter_zone
);
904 CFIL_LOG(LOG_ERR
, "zalloc failed");
908 bzero(cfc
, sizeof(struct content_filter
));
910 cfil_rw_lock_exclusive(&cfil_lck_rw
);
911 if (content_filters
== NULL
) {
912 struct content_filter
**tmp
;
914 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
917 struct content_filter
**,
918 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
922 cfil_rw_lock_exclusive(&cfil_lck_rw
);
924 if (tmp
== NULL
&& content_filters
== NULL
) {
926 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
929 /* Another thread may have won the race */
930 if (content_filters
!= NULL
)
933 content_filters
= tmp
;
936 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
937 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
939 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
940 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
944 * kernel control socket kcunit numbers start at 1
946 content_filters
[sac
->sc_unit
- 1] = cfc
;
948 cfc
->cf_kcref
= kctlref
;
949 cfc
->cf_kcunit
= sac
->sc_unit
;
950 TAILQ_INIT(&cfc
->cf_sock_entries
);
955 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
957 if (error
!= 0 && cfc
!= NULL
)
958 zfree(content_filter_zone
, cfc
);
961 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
963 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
965 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
966 error
, cfil_active_count
, sac
->sc_unit
);
972 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
974 #pragma unused(kctlref)
976 struct content_filter
*cfc
;
977 struct cfil_entry
*entry
;
979 CFIL_LOG(LOG_NOTICE
, "");
981 if (content_filters
== NULL
) {
982 CFIL_LOG(LOG_ERR
, "no content filter");
986 if (kcunit
> MAX_CONTENT_FILTER
) {
987 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
988 kcunit
, MAX_CONTENT_FILTER
);
993 cfc
= (struct content_filter
*)unitinfo
;
997 cfil_rw_lock_exclusive(&cfil_lck_rw
);
998 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
999 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1001 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1004 cfc
->cf_flags
|= CFF_DETACHING
;
1006 * Remove all sockets from the filter
1008 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1009 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1011 verify_content_filter(cfc
);
1013 * Accept all outstanding data by pushing to next filter
1016 * TBD: Actually we should make sure all data has been pushed
1019 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1020 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1021 struct socket
*so
= cfil_info
->cfi_so
;
1023 /* Need to let data flow immediately */
1024 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1028 * Respect locking hierarchy
1030 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1035 * When cfe_filter is NULL the filter is detached
1036 * and the entry has been removed from cf_sock_entries
1038 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
) {
1039 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1042 (void) cfil_action_data_pass(so
, kcunit
, 1,
1046 (void) cfil_action_data_pass(so
, kcunit
, 0,
1050 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1053 * Check again as the socket may have been unlocked
1054 * when when calling cfil_acquire_sockbuf()
1056 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
)
1059 /* The filter is now detached */
1060 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1061 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1062 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1064 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1065 cfil_filters_attached(so
) == 0) {
1066 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1067 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1068 wakeup((caddr_t
)&so
->so_cfil
);
1072 * Remove the filter entry from the content filter
1073 * but leave the rest of the state intact as the queues
1074 * may not be empty yet
1076 entry
->cfe_filter
= NULL
;
1077 entry
->cfe_necp_control_unit
= 0;
1079 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1080 cfc
->cf_sock_count
--;
1082 socket_unlock(so
, 1);
1085 verify_content_filter(cfc
);
1087 VERIFY(cfc
->cf_sock_count
== 0);
1090 * Make filter inactive
1092 content_filters
[kcunit
- 1] = NULL
;
1093 cfil_active_count
--;
1094 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1096 zfree(content_filter_zone
, cfc
);
1099 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1101 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1103 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1104 error
, cfil_active_count
, kcunit
);
1110 * cfil_acquire_sockbuf()
1112 * Prevent any other thread from acquiring the sockbuf
1113 * We use sb_cfil_thread as a semaphore to prevent other threads from
1114 * messing with the sockbuf -- see sblock()
1115 * Note: We do not set SB_LOCK here because the thread may check or modify
1116 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1117 * sblock(), sbunlock() or sodefunct()
1120 cfil_acquire_sockbuf(struct socket
*so
, int outgoing
)
1122 thread_t tp
= current_thread();
1123 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1124 lck_mtx_t
*mutex_held
;
1128 * Wait until no thread is holding the sockbuf and other content
1129 * filter threads have released the sockbuf
1131 while ((sb
->sb_flags
& SB_LOCK
) ||
1132 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1133 if (so
->so_proto
->pr_getlock
!= NULL
)
1134 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1136 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1138 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1141 VERIFY(sb
->sb_wantlock
!= 0);
1143 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1146 VERIFY(sb
->sb_wantlock
!= 0);
1150 * Use reference count for repetitive calls on same thread
1152 if (sb
->sb_cfil_refs
== 0) {
1153 VERIFY(sb
->sb_cfil_thread
== NULL
);
1154 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1156 sb
->sb_cfil_thread
= tp
;
1157 sb
->sb_flags
|= SB_LOCK
;
1161 /* We acquire the socket buffer when we need to cleanup */
1162 if (so
->so_cfil
== NULL
) {
1163 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1164 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1166 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1167 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1168 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1176 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1178 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1179 thread_t tp
= current_thread();
1181 socket_lock_assert_owned(so
);
1183 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)
1184 panic("%s sb_cfil_thread %p not current %p", __func__
,
1185 sb
->sb_cfil_thread
, tp
);
1187 * Don't panic if we are defunct because SB_LOCK has
1188 * been cleared by sodefunct()
1190 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
))
1191 panic("%s SB_LOCK not set on %p", __func__
,
1194 * We can unlock when the thread unwinds to the last reference
1197 if (sb
->sb_cfil_refs
== 0) {
1198 sb
->sb_cfil_thread
= NULL
;
1199 sb
->sb_flags
&= ~SB_LOCK
;
1201 if (sb
->sb_wantlock
> 0)
1202 wakeup(&sb
->sb_flags
);
1207 cfil_sock_id_from_socket(struct socket
*so
)
1209 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
)
1210 return (so
->so_cfil
->cfi_sock_id
);
1212 return (CFIL_SOCK_ID_NONE
);
1215 static struct socket
*
1216 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
)
1218 struct socket
*so
= NULL
;
1219 u_int64_t gencnt
= cfil_sock_id
>> 32;
1220 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1221 struct inpcb
*inp
= NULL
;
1222 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1224 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1225 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1226 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1227 inp
->inp_socket
!= NULL
&&
1228 inp
->inp_flowhash
== flowhash
&&
1229 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1230 inp
->inp_socket
->so_cfil
!= NULL
) {
1231 so
= inp
->inp_socket
;
1235 lck_rw_done(pcbinfo
->ipi_lock
);
1238 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1240 "no socket for sock_id %llx gencnt %llx flowhash %x",
1241 cfil_sock_id
, gencnt
, flowhash
);
1248 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1251 #pragma unused(kctlref, flags)
1253 struct cfil_msg_hdr
*msghdr
;
1254 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1256 struct cfil_msg_action
*action_msg
;
1257 struct cfil_entry
*entry
;
1259 CFIL_LOG(LOG_INFO
, "");
1261 if (content_filters
== NULL
) {
1262 CFIL_LOG(LOG_ERR
, "no content filter");
1266 if (kcunit
> MAX_CONTENT_FILTER
) {
1267 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1268 kcunit
, MAX_CONTENT_FILTER
);
1273 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1274 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1278 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1279 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1280 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1284 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1285 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1289 /* Validate action operation */
1290 switch (msghdr
->cfm_op
) {
1291 case CFM_OP_DATA_UPDATE
:
1293 &cfil_stats
.cfs_ctl_action_data_update
);
1296 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1299 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1300 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1304 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1305 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1307 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1312 cfil_rw_lock_shared(&cfil_lck_rw
);
1313 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1314 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1317 cfil_rw_unlock_shared(&cfil_lck_rw
);
1321 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
);
1323 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1324 msghdr
->cfm_sock_id
);
1326 cfil_rw_unlock_shared(&cfil_lck_rw
);
1329 cfil_rw_unlock_shared(&cfil_lck_rw
);
1333 if (so
->so_cfil
== NULL
) {
1334 CFIL_LOG(LOG_NOTICE
, "so %llx not attached",
1335 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1338 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1339 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1340 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1344 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
1345 if (entry
->cfe_filter
== NULL
) {
1346 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1347 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1352 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)
1353 entry
->cfe_flags
|= CFEF_DATA_START
;
1356 "so %llx attached not sent for %u",
1357 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1362 microuptime(&entry
->cfe_last_action
);
1364 action_msg
= (struct cfil_msg_action
*)msghdr
;
1366 switch (msghdr
->cfm_op
) {
1367 case CFM_OP_DATA_UPDATE
:
1368 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1369 action_msg
->cfa_out_pass_offset
!= 0)
1370 error
= cfil_action_data_pass(so
, kcunit
, 1,
1371 action_msg
->cfa_out_pass_offset
,
1372 action_msg
->cfa_out_peek_offset
);
1373 if (error
== EJUSTRETURN
)
1377 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1378 action_msg
->cfa_in_pass_offset
!= 0)
1379 error
= cfil_action_data_pass(so
, kcunit
, 0,
1380 action_msg
->cfa_in_pass_offset
,
1381 action_msg
->cfa_in_peek_offset
);
1382 if (error
== EJUSTRETURN
)
1387 error
= cfil_action_drop(so
, kcunit
);
1395 socket_unlock(so
, 1);
1400 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1402 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1408 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1409 int opt
, void *data
, size_t *len
)
1411 #pragma unused(kctlref, opt)
1413 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1415 CFIL_LOG(LOG_NOTICE
, "");
1417 cfil_rw_lock_shared(&cfil_lck_rw
);
1419 if (content_filters
== NULL
) {
1420 CFIL_LOG(LOG_ERR
, "no content filter");
1424 if (kcunit
> MAX_CONTENT_FILTER
) {
1425 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1426 kcunit
, MAX_CONTENT_FILTER
);
1430 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1431 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1437 case CFIL_OPT_NECP_CONTROL_UNIT
:
1438 if (*len
< sizeof(uint32_t)) {
1439 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1444 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1447 error
= ENOPROTOOPT
;
1451 cfil_rw_unlock_shared(&cfil_lck_rw
);
1457 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1458 int opt
, void *data
, size_t len
)
1460 #pragma unused(kctlref, opt)
1462 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1464 CFIL_LOG(LOG_NOTICE
, "");
1466 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1468 if (content_filters
== NULL
) {
1469 CFIL_LOG(LOG_ERR
, "no content filter");
1473 if (kcunit
> MAX_CONTENT_FILTER
) {
1474 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1475 kcunit
, MAX_CONTENT_FILTER
);
1479 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1480 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1486 case CFIL_OPT_NECP_CONTROL_UNIT
:
1487 if (len
< sizeof(uint32_t)) {
1488 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1489 "len too small %lu", len
);
1493 if (cfc
->cf_necp_control_unit
!= 0) {
1494 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1496 cfc
->cf_necp_control_unit
);
1500 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1503 error
= ENOPROTOOPT
;
1507 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1514 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1516 #pragma unused(kctlref, flags)
1517 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1518 struct socket
*so
= NULL
;
1520 struct cfil_entry
*entry
;
1522 CFIL_LOG(LOG_INFO
, "");
1524 if (content_filters
== NULL
) {
1525 CFIL_LOG(LOG_ERR
, "no content filter");
1526 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1529 if (kcunit
> MAX_CONTENT_FILTER
) {
1530 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1531 kcunit
, MAX_CONTENT_FILTER
);
1532 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1535 cfil_rw_lock_shared(&cfil_lck_rw
);
1536 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1537 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1539 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1542 /* Let's assume the flow control is lifted */
1543 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1544 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
1545 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1547 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1549 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1550 lck_rw_assert(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1553 * Flow control will be raised again as soon as an entry cannot enqueue
1554 * to the kernel control socket
1556 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1557 verify_content_filter(cfc
);
1559 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
1561 /* Find an entry that is flow controlled */
1562 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1563 if (entry
->cfe_cfil_info
== NULL
||
1564 entry
->cfe_cfil_info
->cfi_so
== NULL
)
1566 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0)
1572 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
1574 so
= entry
->cfe_cfil_info
->cfi_so
;
1576 cfil_rw_unlock_shared(&cfil_lck_rw
);
1580 error
= cfil_acquire_sockbuf(so
, 1);
1582 error
= cfil_data_service_ctl_q(so
, kcunit
, 1);
1583 cfil_release_sockbuf(so
, 1);
1587 error
= cfil_acquire_sockbuf(so
, 0);
1589 error
= cfil_data_service_ctl_q(so
, kcunit
, 0);
1590 cfil_release_sockbuf(so
, 0);
1593 socket_lock_assert_owned(so
);
1594 socket_unlock(so
, 1);
1596 cfil_rw_lock_shared(&cfil_lck_rw
);
1599 cfil_rw_unlock_shared(&cfil_lck_rw
);
1605 struct kern_ctl_reg kern_ctl
;
1607 vm_size_t content_filter_size
= 0; /* size of content_filter */
1608 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
1610 CFIL_LOG(LOG_NOTICE
, "");
1613 * Compile time verifications
1615 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
1616 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
1617 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
1618 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
1621 * Runtime time verifications
1623 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
1625 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
1627 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
1629 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
1632 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
1634 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
1637 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
1639 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
1641 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
1643 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
1647 * Zone for content filters kernel control sockets
1649 content_filter_size
= sizeof(struct content_filter
);
1650 content_filter_zone
= zinit(content_filter_size
,
1651 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
1653 CONTENT_FILTER_ZONE_NAME
);
1654 if (content_filter_zone
== NULL
) {
1655 panic("%s: zinit(%s) failed", __func__
,
1656 CONTENT_FILTER_ZONE_NAME
);
1659 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
1660 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
1663 * Zone for per socket content filters
1665 cfil_info_size
= sizeof(struct cfil_info
);
1666 cfil_info_zone
= zinit(cfil_info_size
,
1667 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
1669 CFIL_INFO_ZONE_NAME
);
1670 if (cfil_info_zone
== NULL
) {
1671 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
1674 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
1675 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
1680 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
1681 if (cfil_lck_grp_attr
== NULL
) {
1682 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
1685 cfil_lck_grp
= lck_grp_alloc_init("content filter",
1687 if (cfil_lck_grp
== NULL
) {
1688 panic("%s: lck_grp_alloc_init failed", __func__
);
1691 cfil_lck_attr
= lck_attr_alloc_init();
1692 if (cfil_lck_attr
== NULL
) {
1693 panic("%s: lck_attr_alloc_init failed", __func__
);
1696 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
1698 TAILQ_INIT(&cfil_sock_head
);
1701 * Register kernel control
1703 bzero(&kern_ctl
, sizeof(kern_ctl
));
1704 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
1705 sizeof(kern_ctl
.ctl_name
));
1706 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
1707 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
1708 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
1709 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
1710 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
1711 kern_ctl
.ctl_send
= cfil_ctl_send
;
1712 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
1713 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
1714 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
1715 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
1717 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
1723 cfil_info_alloc(struct socket
*so
)
1726 struct cfil_info
*cfil_info
= NULL
;
1727 struct inpcb
*inp
= sotoinpcb(so
);
1729 CFIL_LOG(LOG_INFO
, "");
1731 socket_lock_assert_owned(so
);
1733 cfil_info
= zalloc(cfil_info_zone
);
1734 if (cfil_info
== NULL
)
1736 bzero(cfil_info
, sizeof(struct cfil_info
));
1738 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
1739 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1741 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1742 struct cfil_entry
*entry
;
1744 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1745 entry
->cfe_cfil_info
= cfil_info
;
1747 /* Initialize the filter entry */
1748 entry
->cfe_filter
= NULL
;
1749 entry
->cfe_flags
= 0;
1750 entry
->cfe_necp_control_unit
= 0;
1751 entry
->cfe_snd
.cfe_pass_offset
= 0;
1752 entry
->cfe_snd
.cfe_peek_offset
= 0;
1753 entry
->cfe_snd
.cfe_peeked
= 0;
1754 entry
->cfe_rcv
.cfe_pass_offset
= 0;
1755 entry
->cfe_rcv
.cfe_peek_offset
= 0;
1756 entry
->cfe_rcv
.cfe_peeked
= 0;
1758 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
1759 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
1760 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
1761 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
1764 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1766 so
->so_cfil
= cfil_info
;
1767 cfil_info
->cfi_so
= so
;
1769 * Create a cfi_sock_id that's not the socket pointer!
1771 if (inp
->inp_flowhash
== 0)
1772 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
1773 cfil_info
->cfi_sock_id
=
1774 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
1776 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
1778 cfil_sock_attached_count
++;
1780 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1783 if (cfil_info
!= NULL
)
1784 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
1786 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
1792 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
)
1795 struct cfil_info
*cfil_info
= so
->so_cfil
;
1798 CFIL_LOG(LOG_INFO
, "");
1800 socket_lock_assert_owned(so
);
1802 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1805 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1807 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
1808 struct cfil_entry
*entry
;
1812 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
1815 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1817 entry
->cfe_filter
= cfc
;
1818 entry
->cfe_necp_control_unit
= filter_control_unit
;
1819 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1820 cfc
->cf_sock_count
++;
1821 verify_content_filter(cfc
);
1823 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
1827 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1833 cfil_info_free(struct socket
*so
, struct cfil_info
*cfil_info
)
1836 uint64_t in_drain
= 0;
1837 uint64_t out_drained
= 0;
1841 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
1842 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
1843 VERIFY(so
->so_usecount
> 0);
1846 if (cfil_info
== NULL
)
1849 CFIL_LOG(LOG_INFO
, "");
1851 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1854 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1856 struct cfil_entry
*entry
;
1857 struct content_filter
*cfc
;
1859 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1861 /* Don't be silly and try to detach twice */
1862 if (entry
->cfe_filter
== NULL
)
1865 cfc
= content_filters
[kcunit
- 1];
1867 VERIFY(cfc
== entry
->cfe_filter
);
1869 entry
->cfe_filter
= NULL
;
1870 entry
->cfe_necp_control_unit
= 0;
1871 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1872 cfc
->cf_sock_count
--;
1874 verify_content_filter(cfc
);
1876 cfil_sock_attached_count
--;
1877 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
1879 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
1880 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1882 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1883 struct cfil_entry
*entry
;
1885 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1886 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
1887 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
1888 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
1889 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
1891 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1894 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
1896 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
1898 zfree(cfil_info_zone
, cfil_info
);
1902 * Entry point from Sockets layer
1903 * The socket is locked.
1906 cfil_sock_attach(struct socket
*so
)
1909 uint32_t filter_control_unit
;
1911 socket_lock_assert_owned(so
);
1913 /* Limit ourselves to TCP that are not MPTCP subflows */
1914 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
1915 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
1916 so
->so_proto
->pr_type
!= SOCK_STREAM
||
1917 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
||
1918 (so
->so_flags
& SOF_MP_SUBFLOW
) != 0)
1921 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
1922 if (filter_control_unit
== 0)
1925 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
1926 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
1929 if (cfil_active_count
== 0) {
1930 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
1933 if (so
->so_cfil
!= NULL
) {
1934 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
1935 CFIL_LOG(LOG_ERR
, "already attached");
1937 cfil_info_alloc(so
);
1938 if (so
->so_cfil
== NULL
) {
1940 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
1944 if (cfil_info_attach_unit(so
, filter_control_unit
) == 0) {
1945 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
1946 filter_control_unit
);
1947 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
1950 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockid %llx",
1951 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1952 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
1954 so
->so_flags
|= SOF_CONTENT_FILTER
;
1955 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
1957 /* Hold a reference on the socket */
1960 error
= cfil_dispatch_attach_event(so
, filter_control_unit
);
1961 /* We can recover from flow control or out of memory errors */
1962 if (error
== ENOBUFS
|| error
== ENOMEM
)
1964 else if (error
!= 0)
1967 CFIL_INFO_VERIFY(so
->so_cfil
);
1973 * Entry point from Sockets layer
1974 * The socket is locked.
1977 cfil_sock_detach(struct socket
*so
)
1980 cfil_info_free(so
, so
->so_cfil
);
1981 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
1987 cfil_dispatch_attach_event(struct socket
*so
, uint32_t filter_control_unit
)
1990 struct cfil_entry
*entry
= NULL
;
1991 struct cfil_msg_sock_attached msg_attached
;
1993 struct content_filter
*cfc
;
1995 socket_lock_assert_owned(so
);
1997 cfil_rw_lock_shared(&cfil_lck_rw
);
1999 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2004 * Find the matching filter unit
2006 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2007 cfc
= content_filters
[kcunit
- 1];
2011 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2013 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2014 if (entry
->cfe_filter
== NULL
)
2017 VERIFY(cfc
== entry
->cfe_filter
);
2022 if (entry
== NULL
|| entry
->cfe_filter
== NULL
)
2025 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
))
2028 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2029 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2031 /* Would be wasteful to try when flow controlled */
2032 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2037 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2038 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2039 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2040 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2041 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2042 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2044 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2045 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2046 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2047 msg_attached
.cfs_pid
= so
->last_pid
;
2048 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2049 if (so
->so_flags
& SOF_DELEGATED
) {
2050 msg_attached
.cfs_e_pid
= so
->e_pid
;
2051 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2053 msg_attached
.cfs_e_pid
= so
->last_pid
;
2054 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2056 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2057 entry
->cfe_filter
->cf_kcunit
,
2059 sizeof(struct cfil_msg_sock_attached
),
2062 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2065 microuptime(&entry
->cfe_last_event
);
2066 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2067 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2070 /* We can recover from flow control */
2071 if (error
== ENOBUFS
) {
2072 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2073 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2075 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2076 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2078 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2080 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2083 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2085 cfil_rw_unlock_shared(&cfil_lck_rw
);
2091 cfil_dispatch_disconnect_event(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2094 struct mbuf
*msg
= NULL
;
2095 struct cfil_entry
*entry
;
2096 struct cfe_buf
*entrybuf
;
2097 struct cfil_msg_hdr msg_disconnected
;
2098 struct content_filter
*cfc
;
2100 socket_lock_assert_owned(so
);
2102 cfil_rw_lock_shared(&cfil_lck_rw
);
2104 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2106 entrybuf
= &entry
->cfe_snd
;
2108 entrybuf
= &entry
->cfe_rcv
;
2110 cfc
= entry
->cfe_filter
;
2114 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2115 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2118 * Send the disconnection event once
2120 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2121 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2122 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2123 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2128 * We're not disconnected as long as some data is waiting
2129 * to be delivered to the filter
2131 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2132 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2133 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2137 /* Would be wasteful to try when flow controlled */
2138 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2143 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2144 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2145 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2146 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2147 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2148 CFM_OP_DISCONNECT_IN
;
2149 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2150 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2151 entry
->cfe_filter
->cf_kcunit
,
2153 sizeof(struct cfil_msg_hdr
),
2156 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2160 microuptime(&entry
->cfe_last_event
);
2162 /* Remember we have sent the disconnection message */
2164 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2165 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2167 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2168 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2171 if (error
== ENOBUFS
) {
2172 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2174 &cfil_stats
.cfs_disconnect_event_flow_control
);
2176 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2177 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2179 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2181 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2185 &cfil_stats
.cfs_disconnect_event_fail
);
2187 cfil_rw_unlock_shared(&cfil_lck_rw
);
2193 cfil_dispatch_closed_event(struct socket
*so
, int kcunit
)
2195 struct cfil_entry
*entry
;
2196 struct cfil_msg_hdr msg_closed
;
2198 struct content_filter
*cfc
;
2200 socket_lock_assert_owned(so
);
2202 cfil_rw_lock_shared(&cfil_lck_rw
);
2204 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2205 cfc
= entry
->cfe_filter
;
2209 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2210 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2212 /* Would be wasteful to try when flow controlled */
2213 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2218 * Send a single closed message per filter
2220 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0)
2222 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
2225 bzero(&msg_closed
, sizeof(struct cfil_msg_hdr
));
2226 msg_closed
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2227 msg_closed
.cfm_version
= CFM_VERSION_CURRENT
;
2228 msg_closed
.cfm_type
= CFM_TYPE_EVENT
;
2229 msg_closed
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2230 msg_closed
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2231 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2232 entry
->cfe_filter
->cf_kcunit
,
2234 sizeof(struct cfil_msg_hdr
),
2237 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2241 microuptime(&entry
->cfe_last_event
);
2242 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2243 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2245 /* We can recover from flow control */
2246 if (error
== ENOBUFS
) {
2247 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2248 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2250 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2251 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2253 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2255 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2258 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2260 cfil_rw_unlock_shared(&cfil_lck_rw
);
2267 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2268 struct in6_addr
*ip6
, u_int16_t port
)
2270 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2272 sin6
->sin6_family
= AF_INET6
;
2273 sin6
->sin6_len
= sizeof(*sin6
);
2274 sin6
->sin6_port
= port
;
2275 sin6
->sin6_addr
= *ip6
;
2276 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2277 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2278 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2283 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2284 struct in_addr ip
, u_int16_t port
)
2286 struct sockaddr_in
*sin
= &sin46
->sin
;
2288 sin
->sin_family
= AF_INET
;
2289 sin
->sin_len
= sizeof(*sin
);
2290 sin
->sin_port
= port
;
2291 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2295 cfil_dispatch_data_event(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2296 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2299 struct mbuf
*copy
= NULL
;
2300 struct mbuf
*msg
= NULL
;
2301 unsigned int one
= 1;
2302 struct cfil_msg_data_event
*data_req
;
2304 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2305 struct cfil_entry
*entry
;
2306 struct cfe_buf
*entrybuf
;
2307 struct content_filter
*cfc
;
2309 cfil_rw_lock_shared(&cfil_lck_rw
);
2311 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2313 entrybuf
= &entry
->cfe_snd
;
2315 entrybuf
= &entry
->cfe_rcv
;
2317 cfc
= entry
->cfe_filter
;
2321 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2322 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2324 socket_lock_assert_owned(so
);
2326 /* Would be wasteful to try */
2327 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2332 /* Make a copy of the data to pass to kernel control socket */
2333 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2336 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2341 /* We need an mbuf packet for the message header */
2342 hdrsize
= sizeof(struct cfil_msg_data_event
);
2343 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2345 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2348 * ENOBUFS is to indicate flow control
2353 mbuf_setlen(msg
, hdrsize
);
2354 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2356 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2357 bzero(data_req
, hdrsize
);
2358 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2359 data_req
->cfd_msghdr
.cfm_version
= 1;
2360 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2361 data_req
->cfd_msghdr
.cfm_op
=
2362 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2363 data_req
->cfd_msghdr
.cfm_sock_id
=
2364 entry
->cfe_cfil_info
->cfi_sock_id
;
2365 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2366 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
2370 * For non connected sockets need to copy addresses from passed
2373 if (inp
->inp_vflag
& INP_IPV6
) {
2375 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2376 &inp
->in6p_laddr
, inp
->inp_lport
);
2377 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2378 &inp
->in6p_faddr
, inp
->inp_fport
);
2380 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2381 &inp
->in6p_faddr
, inp
->inp_fport
);
2382 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2383 &inp
->in6p_laddr
, inp
->inp_lport
);
2385 } else if (inp
->inp_vflag
& INP_IPV4
) {
2387 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2388 inp
->inp_laddr
, inp
->inp_lport
);
2389 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2390 inp
->inp_faddr
, inp
->inp_fport
);
2392 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2393 inp
->inp_faddr
, inp
->inp_fport
);
2394 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2395 inp
->inp_laddr
, inp
->inp_lport
);
2399 /* Pass the message to the content filter */
2400 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
2401 entry
->cfe_filter
->cf_kcunit
,
2404 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2408 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
2409 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
2411 if (error
== ENOBUFS
) {
2412 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2414 &cfil_stats
.cfs_data_event_flow_control
);
2416 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2417 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2419 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2421 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2424 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
2426 cfil_rw_unlock_shared(&cfil_lck_rw
);
2432 * Process the queue of data waiting to be delivered to content filter
2435 cfil_data_service_ctl_q(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2438 struct mbuf
*data
, *tmp
= NULL
;
2439 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
2440 struct cfil_entry
*entry
;
2441 struct cfe_buf
*entrybuf
;
2442 uint64_t currentoffset
= 0;
2444 if (so
->so_cfil
== NULL
)
2447 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2448 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2450 socket_lock_assert_owned(so
);
2452 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2454 entrybuf
= &entry
->cfe_snd
;
2456 entrybuf
= &entry
->cfe_rcv
;
2458 /* Send attached message if not yet done */
2459 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
2460 error
= cfil_dispatch_attach_event(so
, kcunit
);
2462 /* We can recover from flow control */
2463 if (error
== ENOBUFS
|| error
== ENOMEM
)
2467 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
2468 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
2471 CFIL_LOG(LOG_DEBUG
, "pass_offset %llu peeked %llu peek_offset %llu",
2472 entrybuf
->cfe_pass_offset
,
2473 entrybuf
->cfe_peeked
,
2474 entrybuf
->cfe_peek_offset
);
2476 /* Move all data that can pass */
2477 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
2478 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
2479 datalen
= cfil_data_length(data
, NULL
);
2482 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
2483 entrybuf
->cfe_pass_offset
) {
2485 * The first mbuf can fully pass
2490 * The first mbuf can partially pass
2492 copylen
= entrybuf
->cfe_pass_offset
-
2493 entrybuf
->cfe_ctl_q
.q_start
;
2495 VERIFY(copylen
<= datalen
);
2498 "%llx first %llu peeked %llu pass %llu peek %llu"
2499 "datalen %u copylen %u",
2500 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2501 entrybuf
->cfe_ctl_q
.q_start
,
2502 entrybuf
->cfe_peeked
,
2503 entrybuf
->cfe_pass_offset
,
2504 entrybuf
->cfe_peek_offset
,
2508 * Data that passes has been peeked at explicitly or
2511 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
2512 entrybuf
->cfe_peeked
)
2513 entrybuf
->cfe_peeked
=
2514 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
2516 * Stop on partial pass
2518 if (copylen
< datalen
)
2521 /* All good, move full data from ctl queue to pending queue */
2522 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2524 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
2526 OSAddAtomic64(datalen
,
2527 &cfil_stats
.cfs_pending_q_out_enqueued
);
2529 OSAddAtomic64(datalen
,
2530 &cfil_stats
.cfs_pending_q_in_enqueued
);
2532 CFIL_INFO_VERIFY(so
->so_cfil
);
2535 "%llx first %llu peeked %llu pass %llu peek %llu"
2536 "datalen %u copylen %u",
2537 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2538 entrybuf
->cfe_ctl_q
.q_start
,
2539 entrybuf
->cfe_peeked
,
2540 entrybuf
->cfe_pass_offset
,
2541 entrybuf
->cfe_peek_offset
,
2545 /* Now deal with remaining data the filter wants to peek at */
2546 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
2547 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
2548 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
2549 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
2550 currentoffset
+= datalen
) {
2551 datalen
= cfil_data_length(data
, NULL
);
2554 /* We've already peeked at this mbuf */
2555 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
)
2558 * The data in the first mbuf may have been
2559 * partially peeked at
2561 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
2562 VERIFY(copyoffset
< datalen
);
2563 copylen
= datalen
- copyoffset
;
2564 VERIFY(copylen
<= datalen
);
2566 * Do not copy more than needed
2568 if (currentoffset
+ copyoffset
+ copylen
>
2569 entrybuf
->cfe_peek_offset
) {
2570 copylen
= entrybuf
->cfe_peek_offset
-
2571 (currentoffset
+ copyoffset
);
2575 "%llx current %llu peeked %llu pass %llu peek %llu"
2576 "datalen %u copylen %u copyoffset %u",
2577 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2579 entrybuf
->cfe_peeked
,
2580 entrybuf
->cfe_pass_offset
,
2581 entrybuf
->cfe_peek_offset
,
2582 datalen
, copylen
, copyoffset
);
2585 * Stop if there is nothing more to peek at
2590 * Let the filter get a peek at this span of data
2592 error
= cfil_dispatch_data_event(so
, kcunit
,
2593 outgoing
, data
, copyoffset
, copylen
);
2595 /* On error, leave data in ctl_q */
2598 entrybuf
->cfe_peeked
+= copylen
;
2600 OSAddAtomic64(copylen
,
2601 &cfil_stats
.cfs_ctl_q_out_peeked
);
2603 OSAddAtomic64(copylen
,
2604 &cfil_stats
.cfs_ctl_q_in_peeked
);
2606 /* Stop when data could not be fully peeked at */
2607 if (copylen
+ copyoffset
< datalen
)
2610 CFIL_INFO_VERIFY(so
->so_cfil
);
2613 "%llx first %llu peeked %llu pass %llu peek %llu"
2614 "datalen %u copylen %u copyoffset %u",
2615 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2617 entrybuf
->cfe_peeked
,
2618 entrybuf
->cfe_pass_offset
,
2619 entrybuf
->cfe_peek_offset
,
2620 datalen
, copylen
, copyoffset
);
2623 * Process data that has passed the filter
2625 error
= cfil_service_pending_queue(so
, kcunit
, outgoing
);
2627 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
2633 * Dispatch disconnect events that could not be sent
2635 if (so
->so_cfil
== NULL
)
2637 else if (outgoing
) {
2638 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) &&
2639 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
))
2640 cfil_dispatch_disconnect_event(so
, kcunit
, 1);
2642 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) &&
2643 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))
2644 cfil_dispatch_disconnect_event(so
, kcunit
, 0);
2649 "first %llu peeked %llu pass %llu peek %llu",
2650 entrybuf
->cfe_ctl_q
.q_start
,
2651 entrybuf
->cfe_peeked
,
2652 entrybuf
->cfe_pass_offset
,
2653 entrybuf
->cfe_peek_offset
);
2655 CFIL_INFO_VERIFY(so
->so_cfil
);
2660 * cfil_data_filter()
2662 * Process data for a content filter installed on a socket
2665 cfil_data_filter(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2666 struct mbuf
*data
, uint64_t datalen
)
2669 struct cfil_entry
*entry
;
2670 struct cfe_buf
*entrybuf
;
2672 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2673 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2675 socket_lock_assert_owned(so
);
2677 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2679 entrybuf
= &entry
->cfe_snd
;
2681 entrybuf
= &entry
->cfe_rcv
;
2683 /* Are we attached to the filter? */
2684 if (entry
->cfe_filter
== NULL
) {
2689 /* Dispatch to filters */
2690 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2692 OSAddAtomic64(datalen
,
2693 &cfil_stats
.cfs_ctl_q_out_enqueued
);
2695 OSAddAtomic64(datalen
,
2696 &cfil_stats
.cfs_ctl_q_in_enqueued
);
2698 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2700 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
2704 * We have to return EJUSTRETURN in all cases to avoid double free
2707 error
= EJUSTRETURN
;
2709 CFIL_INFO_VERIFY(so
->so_cfil
);
2711 CFIL_LOG(LOG_INFO
, "return %d", error
);
2716 * cfil_service_inject_queue() re-inject data that passed the
2720 cfil_service_inject_queue(struct socket
*so
, int outgoing
)
2723 unsigned int datalen
;
2725 unsigned int copylen
;
2727 struct mbuf
*copy
= NULL
;
2728 struct cfi_buf
*cfi_buf
;
2729 struct cfil_queue
*inject_q
;
2730 int need_rwakeup
= 0;
2732 if (so
->so_cfil
== NULL
)
2735 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
2736 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
2738 socket_lock_assert_owned(so
);
2741 cfi_buf
= &so
->so_cfil
->cfi_snd
;
2742 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
2744 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
2745 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
2747 inject_q
= &cfi_buf
->cfi_inject_q
;
2749 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
2750 datalen
= cfil_data_length(data
, &mbcnt
);
2752 CFIL_LOG(LOG_INFO
, "data %llx datalen %u",
2753 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
);
2755 /* Make a copy in case of injection error */
2756 copy
= m_copym_mode(data
, 0, M_COPYALL
, M_DONTWAIT
,
2759 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2764 if ((copylen
= m_length(copy
)) != datalen
)
2765 panic("%s so %p copylen %d != datalen %d",
2766 __func__
, so
, copylen
, datalen
);
2769 socket_unlock(so
, 0);
2772 * Set both DONTWAIT and NBIO flags are we really
2773 * do not want to block
2775 error
= sosend(so
, NULL
, NULL
,
2777 MSG_SKIPCFIL
| MSG_DONTWAIT
| MSG_NBIO
);
2782 CFIL_LOG(LOG_ERR
, "sosend() failed %d",
2786 copy
->m_flags
|= M_SKIPCFIL
;
2790 * This work only because we support plain TCP
2791 * For UDP, RAWIP, MPTCP and message TCP we'll
2792 * need to call the appropriate sbappendxxx()
2793 * of fix sock_inject_data_in()
2795 if (sbappendstream(&so
->so_rcv
, copy
))
2799 /* Need to reassess if filter is still attached after unlock */
2800 if (so
->so_cfil
== NULL
) {
2801 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2802 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2803 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_detached
);
2810 /* Injection successful */
2811 cfil_queue_remove(inject_q
, data
, datalen
);
2814 cfi_buf
->cfi_pending_first
+= datalen
;
2815 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
2816 cfil_info_buf_verify(cfi_buf
);
2819 OSAddAtomic64(datalen
,
2820 &cfil_stats
.cfs_inject_q_out_passed
);
2822 OSAddAtomic64(datalen
,
2823 &cfil_stats
.cfs_inject_q_in_passed
);
2826 /* A single wakeup is for several packets is more efficient */
2830 if (error
!= 0 && so
->so_cfil
) {
2831 if (error
== ENOBUFS
)
2832 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
2833 if (error
== ENOMEM
)
2834 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
2837 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
2838 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
2840 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
2841 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
2848 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
)) {
2849 cfil_sock_notify_shutdown(so
, SHUT_WR
);
2850 if (cfil_sock_data_pending(&so
->so_snd
) == 0)
2851 soshutdownlock_final(so
, SHUT_WR
);
2853 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
2854 if (cfil_filters_attached(so
) == 0) {
2855 CFIL_LOG(LOG_INFO
, "so %llx waking",
2856 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2857 wakeup((caddr_t
)&so
->so_cfil
);
2861 CFIL_INFO_VERIFY(so
->so_cfil
);
2867 cfil_service_pending_queue(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2869 uint64_t passlen
, curlen
;
2871 unsigned int datalen
;
2873 struct cfil_entry
*entry
;
2874 struct cfe_buf
*entrybuf
;
2875 struct cfil_queue
*pending_q
;
2877 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2878 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2880 socket_lock_assert_owned(so
);
2882 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2884 entrybuf
= &entry
->cfe_snd
;
2886 entrybuf
= &entry
->cfe_rcv
;
2888 pending_q
= &entrybuf
->cfe_pending_q
;
2890 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
2893 * Locate the chunks of data that we can pass to the next filter
2894 * A data chunk must be on mbuf boundaries
2897 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
2898 datalen
= cfil_data_length(data
, NULL
);
2901 "data %llx datalen %u passlen %llu curlen %llu",
2902 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
2905 if (curlen
+ datalen
> passlen
)
2908 cfil_queue_remove(pending_q
, data
, datalen
);
2913 kcunit
<= MAX_CONTENT_FILTER
;
2915 error
= cfil_data_filter(so
, kcunit
, outgoing
,
2917 /* 0 means passed so we can continue */
2921 /* When data has passed all filters, re-inject */
2925 &so
->so_cfil
->cfi_snd
.cfi_inject_q
,
2927 OSAddAtomic64(datalen
,
2928 &cfil_stats
.cfs_inject_q_out_enqueued
);
2931 &so
->so_cfil
->cfi_rcv
.cfi_inject_q
,
2933 OSAddAtomic64(datalen
,
2934 &cfil_stats
.cfs_inject_q_in_enqueued
);
2939 CFIL_INFO_VERIFY(so
->so_cfil
);
2945 cfil_update_data_offsets(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2946 uint64_t pass_offset
, uint64_t peek_offset
)
2949 struct cfil_entry
*entry
= NULL
;
2950 struct cfe_buf
*entrybuf
;
2953 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
2955 socket_lock_assert_owned(so
);
2957 if (so
->so_cfil
== NULL
) {
2958 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2959 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2962 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
2963 CFIL_LOG(LOG_ERR
, "so %llx drop set",
2964 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2969 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2971 entrybuf
= &entry
->cfe_snd
;
2973 entrybuf
= &entry
->cfe_rcv
;
2975 /* Record updated offsets for this content filter */
2976 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
2977 entrybuf
->cfe_pass_offset
= pass_offset
;
2979 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
2980 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
2983 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
2984 pass_offset
, entrybuf
->cfe_pass_offset
);
2986 /* Filter does not want or need to see data that's allowed to pass */
2987 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
2988 peek_offset
> entrybuf
->cfe_peek_offset
) {
2989 entrybuf
->cfe_peek_offset
= peek_offset
;
2996 /* Move data held in control queue to pending queue if needed */
2997 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2999 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3003 error
= EJUSTRETURN
;
3007 * The filter is effectively detached when pass all from both sides
3008 * or when the socket is closed and no more data is waiting
3009 * to be delivered to the filter
3011 if (entry
!= NULL
&&
3012 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3013 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3014 ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3015 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3016 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3017 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3018 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3019 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3020 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3021 cfil_filters_attached(so
) == 0) {
3022 CFIL_LOG(LOG_INFO
, "so %llx waking",
3023 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3024 wakeup((caddr_t
)&so
->so_cfil
);
3027 CFIL_INFO_VERIFY(so
->so_cfil
);
3028 CFIL_LOG(LOG_INFO
, "return %d", error
);
3033 * Update pass offset for socket when no data is pending
3036 cfil_set_socket_pass_offset(struct socket
*so
, int outgoing
)
3038 struct cfi_buf
*cfi_buf
;
3039 struct cfil_entry
*entry
;
3040 struct cfe_buf
*entrybuf
;
3042 uint64_t pass_offset
= 0;
3044 if (so
->so_cfil
== NULL
)
3047 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3048 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3050 socket_lock_assert_owned(so
);
3053 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3055 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3057 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3058 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3059 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3061 /* Are we attached to a filter? */
3062 if (entry
->cfe_filter
== NULL
)
3066 entrybuf
= &entry
->cfe_snd
;
3068 entrybuf
= &entry
->cfe_rcv
;
3070 if (pass_offset
== 0 ||
3071 entrybuf
->cfe_pass_offset
< pass_offset
)
3072 pass_offset
= entrybuf
->cfe_pass_offset
;
3074 cfi_buf
->cfi_pass_offset
= pass_offset
;
3081 cfil_action_data_pass(struct socket
*so
, uint32_t kcunit
, int outgoing
,
3082 uint64_t pass_offset
, uint64_t peek_offset
)
3086 CFIL_LOG(LOG_INFO
, "");
3088 socket_lock_assert_owned(so
);
3090 error
= cfil_acquire_sockbuf(so
, outgoing
);
3092 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3093 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3094 outgoing
? "out" : "in");
3098 error
= cfil_update_data_offsets(so
, kcunit
, outgoing
,
3099 pass_offset
, peek_offset
);
3101 cfil_service_inject_queue(so
, outgoing
);
3103 cfil_set_socket_pass_offset(so
, outgoing
);
3105 CFIL_INFO_VERIFY(so
->so_cfil
);
3106 cfil_release_sockbuf(so
, outgoing
);
3113 cfil_flush_queues(struct socket
*so
)
3115 struct cfil_entry
*entry
;
3119 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3122 socket_lock_assert_owned(so
);
3125 * Flush the output queues and ignore errors as long as
3128 (void) cfil_acquire_sockbuf(so
, 1);
3129 if (so
->so_cfil
!= NULL
) {
3131 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3132 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3134 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3135 drained
+= cfil_queue_drain(
3136 &entry
->cfe_snd
.cfe_pending_q
);
3138 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_snd
.cfi_inject_q
);
3140 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3142 &cfil_stats
.cfs_flush_out_drop
);
3145 &cfil_stats
.cfs_flush_out_close
);
3148 cfil_release_sockbuf(so
, 1);
3151 * Flush the input queues
3153 (void) cfil_acquire_sockbuf(so
, 0);
3154 if (so
->so_cfil
!= NULL
) {
3156 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3157 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3159 drained
+= cfil_queue_drain(
3160 &entry
->cfe_rcv
.cfe_ctl_q
);
3161 drained
+= cfil_queue_drain(
3162 &entry
->cfe_rcv
.cfe_pending_q
);
3164 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_rcv
.cfi_inject_q
);
3166 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3168 &cfil_stats
.cfs_flush_in_drop
);
3171 &cfil_stats
.cfs_flush_in_close
);
3174 cfil_release_sockbuf(so
, 0);
3176 CFIL_INFO_VERIFY(so
->so_cfil
);
3180 cfil_action_drop(struct socket
*so
, uint32_t kcunit
)
3183 struct cfil_entry
*entry
;
3186 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3189 socket_lock_assert_owned(so
);
3191 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3193 /* Are we attached to the filter? */
3194 if (entry
->cfe_filter
== NULL
)
3197 so
->so_cfil
->cfi_flags
|= CFIF_DROP
;
3202 * Force the socket to be marked defunct
3203 * (forcing fixed along with rdar://19391339)
3205 error
= sosetdefunct(p
, so
,
3206 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
3209 /* Flush the socket buffer and disconnect */
3211 error
= sodefunct(p
, so
,
3212 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3214 /* The filter is done, mark as detached */
3215 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3216 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3217 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3219 /* Pending data needs to go */
3220 cfil_flush_queues(so
);
3222 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3223 if (cfil_filters_attached(so
) == 0) {
3224 CFIL_LOG(LOG_INFO
, "so %llx waking",
3225 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3226 wakeup((caddr_t
)&so
->so_cfil
);
3234 cfil_update_entry_offsets(struct socket
*so
, int outgoing
, unsigned int datalen
)
3236 struct cfil_entry
*entry
;
3237 struct cfe_buf
*entrybuf
;
3240 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3241 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3243 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3244 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3246 /* Are we attached to the filter? */
3247 if (entry
->cfe_filter
== NULL
)
3251 entrybuf
= &entry
->cfe_snd
;
3253 entrybuf
= &entry
->cfe_rcv
;
3255 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
3256 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
3257 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
3258 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3259 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3261 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
3263 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
3264 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
3266 CFIL_INFO_VERIFY(so
->so_cfil
);
3271 cfil_data_common(struct socket
*so
, int outgoing
, struct sockaddr
*to
,
3272 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3274 #pragma unused(to, control, flags)
3276 unsigned int datalen
;
3279 struct cfi_buf
*cfi_buf
;
3281 if (so
->so_cfil
== NULL
) {
3282 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3283 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3286 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3287 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3288 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3293 datalen
= cfil_data_length(data
, &mbcnt
);
3295 CFIL_LOG(LOG_INFO
, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3296 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3297 outgoing
? "out" : "in",
3298 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
3299 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
));
3302 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3304 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3306 cfi_buf
->cfi_pending_last
+= datalen
;
3307 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
3308 cfil_info_buf_verify(cfi_buf
);
3310 CFIL_LOG(LOG_INFO
, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3311 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3312 cfi_buf
->cfi_pending_last
,
3313 cfi_buf
->cfi_pass_offset
);
3315 /* Fast path when below pass offset */
3316 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
3317 cfil_update_entry_offsets(so
, outgoing
, datalen
);
3319 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3320 error
= cfil_data_filter(so
, kcunit
, outgoing
, data
,
3322 /* 0 means passed so continue with next filter */
3328 /* Move cursor if no filter claimed the data */
3330 cfi_buf
->cfi_pending_first
+= datalen
;
3331 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3332 cfil_info_buf_verify(cfi_buf
);
3335 CFIL_INFO_VERIFY(so
->so_cfil
);
3341 * Callback from socket layer sosendxxx()
3344 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
3345 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3349 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3352 socket_lock_assert_owned(so
);
3354 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3355 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3356 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3359 if (control
!= NULL
) {
3360 CFIL_LOG(LOG_ERR
, "so %llx control",
3361 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3362 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
3364 if ((flags
& MSG_OOB
)) {
3365 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3366 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3367 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
3369 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0)
3370 panic("so %p SB_LOCK not set", so
);
3372 if (so
->so_snd
.sb_cfil_thread
!= NULL
)
3373 panic("%s sb_cfil_thread %p not NULL", __func__
,
3374 so
->so_snd
.sb_cfil_thread
);
3376 error
= cfil_data_common(so
, 1, to
, data
, control
, flags
);
3382 * Callback from socket layer sbappendxxx()
3385 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
3386 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3390 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3393 socket_lock_assert_owned(so
);
3395 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3396 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3397 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3400 if (control
!= NULL
) {
3401 CFIL_LOG(LOG_ERR
, "so %llx control",
3402 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3403 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
3405 if (data
->m_type
== MT_OOBDATA
) {
3406 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3407 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3408 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
3410 error
= cfil_data_common(so
, 0, from
, data
, control
, flags
);
3416 * Callback from socket layer soshutdownxxx()
3418 * We may delay the shutdown write if there's outgoing data in process.
3420 * There is no point in delaying the shutdown read because the process
3421 * indicated that it does not want to read anymore data.
3424 cfil_sock_shutdown(struct socket
*so
, int *how
)
3428 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3431 socket_lock_assert_owned(so
);
3433 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3434 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
3437 * Check the state of the socket before the content filter
3439 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
3440 /* read already shut down */
3444 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
3445 /* write already shut down */
3450 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
3451 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3452 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3457 * shutdown read: SHUT_RD or SHUT_RDWR
3459 if (*how
!= SHUT_WR
) {
3460 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
3464 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
3465 cfil_sock_notify_shutdown(so
, SHUT_RD
);
3468 * shutdown write: SHUT_WR or SHUT_RDWR
3470 if (*how
!= SHUT_RD
) {
3471 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
3475 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
3476 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3478 * When outgoing data is pending, we delay the shutdown at the
3479 * protocol level until the content filters give the final
3480 * verdict on the pending data.
3482 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
3484 * When shutting down the read and write sides at once
3485 * we can proceed to the final shutdown of the read
3486 * side. Otherwise, we just return.
3488 if (*how
== SHUT_WR
) {
3489 error
= EJUSTRETURN
;
3490 } else if (*how
== SHUT_RDWR
) {
3500 * This is called when the socket is closed and there is no more
3501 * opportunity for filtering
3504 cfil_sock_is_closed(struct socket
*so
)
3509 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3512 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3514 socket_lock_assert_owned(so
);
3516 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3517 /* Let the filters know of the closing */
3518 error
= cfil_dispatch_closed_event(so
, kcunit
);
3521 /* Last chance to push passed data out */
3522 error
= cfil_acquire_sockbuf(so
, 1);
3524 cfil_service_inject_queue(so
, 1);
3525 cfil_release_sockbuf(so
, 1);
3527 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
3529 /* Pending data needs to go */
3530 cfil_flush_queues(so
);
3532 CFIL_INFO_VERIFY(so
->so_cfil
);
3536 * This is called when the socket is disconnected so let the filters
3537 * know about the disconnection and that no more data will come
3539 * The how parameter has the same values as soshutown()
3542 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
3547 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3550 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3551 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
3553 socket_lock_assert_owned(so
);
3555 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3556 /* Disconnect incoming side */
3558 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 0);
3559 /* Disconnect outgoing side */
3561 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 1);
3566 cfil_filters_attached(struct socket
*so
)
3568 struct cfil_entry
*entry
;
3572 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3575 socket_lock_assert_owned(so
);
3577 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3578 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3580 /* Are we attached to the filter? */
3581 if (entry
->cfe_filter
== NULL
)
3583 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
3585 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
3595 * This is called when the socket is closed and we are waiting for
3596 * the filters to gives the final pass or drop
3599 cfil_sock_close_wait(struct socket
*so
)
3601 lck_mtx_t
*mutex_held
;
3605 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3608 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3610 if (so
->so_proto
->pr_getlock
!= NULL
)
3611 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3613 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3614 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3616 while (cfil_filters_attached(so
)) {
3618 * Notify the filters we are going away so they can detach
3620 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
3623 * Make sure we need to wait after the filter are notified
3624 * of the disconnection
3626 if (cfil_filters_attached(so
) == 0)
3629 CFIL_LOG(LOG_INFO
, "so %llx waiting",
3630 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3632 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
3633 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
3634 NSEC_PER_USEC
* 1000;
3636 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
3637 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
3638 error
= msleep((caddr_t
)&so
->so_cfil
, mutex_held
,
3639 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
3640 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
3642 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
3643 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
3646 * Force close in case of timeout
3649 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
3657 * Returns the size of the data held by the content filter by using
3660 cfil_sock_data_pending(struct sockbuf
*sb
)
3662 struct socket
*so
= sb
->sb_so
;
3663 uint64_t pending
= 0;
3665 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
3666 struct cfi_buf
*cfi_buf
;
3668 socket_lock_assert_owned(so
);
3670 if ((sb
->sb_flags
& SB_RECV
) == 0)
3671 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3673 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3675 pending
= cfi_buf
->cfi_pending_last
-
3676 cfi_buf
->cfi_pending_first
;
3679 * If we are limited by the "chars of mbufs used" roughly
3680 * adjust so we won't overcommit
3682 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
)
3683 pending
= cfi_buf
->cfi_pending_mbcnt
;
3686 VERIFY(pending
< INT32_MAX
);
3688 return (int32_t)(pending
);
3692 * Return the socket buffer space used by data being held by content filters
3693 * so processes won't clog the socket buffer
3696 cfil_sock_data_space(struct sockbuf
*sb
)
3698 struct socket
*so
= sb
->sb_so
;
3699 uint64_t pending
= 0;
3701 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
3702 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
3703 struct cfi_buf
*cfi_buf
;
3705 socket_lock_assert_owned(so
);
3707 if ((sb
->sb_flags
& SB_RECV
) == 0)
3708 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3710 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3712 pending
= cfi_buf
->cfi_pending_last
-
3713 cfi_buf
->cfi_pending_first
;
3716 * If we are limited by the "chars of mbufs used" roughly
3717 * adjust so we won't overcommit
3719 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
3720 pending
= cfi_buf
->cfi_pending_mbcnt
;
3723 VERIFY(pending
< INT32_MAX
);
3725 return (int32_t)(pending
);
3729 * A callback from the socket and protocol layer when data becomes
3730 * available in the socket buffer to give a chance for the content filter
3731 * to re-inject data that was held back
3734 cfil_sock_buf_update(struct sockbuf
*sb
)
3738 struct socket
*so
= sb
->sb_so
;
3740 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3746 socket_lock_assert_owned(so
);
3748 if ((sb
->sb_flags
& SB_RECV
) == 0) {
3749 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
3752 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
3754 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
3757 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
3760 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
3761 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3763 error
= cfil_acquire_sockbuf(so
, outgoing
);
3765 cfil_service_inject_queue(so
, outgoing
);
3766 cfil_release_sockbuf(so
, outgoing
);
3770 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3771 struct sysctl_req
*req
)
3773 #pragma unused(oidp, arg1, arg2)
3779 if (req
->newptr
!= USER_ADDR_NULL
)
3782 cfil_rw_lock_shared(&cfil_lck_rw
);
3784 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
3785 struct cfil_filter_stat filter_stat
;
3786 struct content_filter
*cfc
= content_filters
[i
];
3791 /* If just asking for the size */
3792 if (req
->oldptr
== USER_ADDR_NULL
) {
3793 len
+= sizeof(struct cfil_filter_stat
);
3797 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
3798 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
3799 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
3800 filter_stat
.cfs_flags
= cfc
->cf_flags
;
3801 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
3802 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
3804 error
= SYSCTL_OUT(req
, &filter_stat
,
3805 sizeof (struct cfil_filter_stat
));
3809 /* If just asking for the size */
3810 if (req
->oldptr
== USER_ADDR_NULL
)
3813 cfil_rw_unlock_shared(&cfil_lck_rw
);
3818 static int sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3819 struct sysctl_req
*req
)
3821 #pragma unused(oidp, arg1, arg2)
3824 struct cfil_info
*cfi
;
3827 if (req
->newptr
!= USER_ADDR_NULL
)
3830 cfil_rw_lock_shared(&cfil_lck_rw
);
3833 * If just asking for the size,
3835 if (req
->oldptr
== USER_ADDR_NULL
) {
3836 req
->oldidx
= cfil_sock_attached_count
*
3837 sizeof(struct cfil_sock_stat
);
3838 /* Bump the length in case new sockets gets attached */
3839 req
->oldidx
+= req
->oldidx
>> 3;
3843 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
3844 struct cfil_entry
*entry
;
3845 struct cfil_sock_stat stat
;
3846 struct socket
*so
= cfi
->cfi_so
;
3848 bzero(&stat
, sizeof(struct cfil_sock_stat
));
3849 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
3850 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
3851 stat
.cfs_flags
= cfi
->cfi_flags
;
3854 stat
.cfs_pid
= so
->last_pid
;
3855 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
3857 if (so
->so_flags
& SOF_DELEGATED
) {
3858 stat
.cfs_e_pid
= so
->e_pid
;
3859 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
3862 stat
.cfs_e_pid
= so
->last_pid
;
3863 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
3868 stat
.cfs_snd
.cbs_pending_first
=
3869 cfi
->cfi_snd
.cfi_pending_first
;
3870 stat
.cfs_snd
.cbs_pending_last
=
3871 cfi
->cfi_snd
.cfi_pending_last
;
3872 stat
.cfs_snd
.cbs_inject_q_len
=
3873 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
3874 stat
.cfs_snd
.cbs_pass_offset
=
3875 cfi
->cfi_snd
.cfi_pass_offset
;
3877 stat
.cfs_rcv
.cbs_pending_first
=
3878 cfi
->cfi_rcv
.cfi_pending_first
;
3879 stat
.cfs_rcv
.cbs_pending_last
=
3880 cfi
->cfi_rcv
.cfi_pending_last
;
3881 stat
.cfs_rcv
.cbs_inject_q_len
=
3882 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
3883 stat
.cfs_rcv
.cbs_pass_offset
=
3884 cfi
->cfi_rcv
.cfi_pass_offset
;
3886 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
3887 struct cfil_entry_stat
*estat
;
3888 struct cfe_buf
*ebuf
;
3889 struct cfe_buf_stat
*sbuf
;
3891 entry
= &cfi
->cfi_entries
[i
];
3893 estat
= &stat
.ces_entries
[i
];
3895 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
3896 estat
->ces_filter_id
= entry
->cfe_filter
?
3897 entry
->cfe_filter
->cf_kcunit
: 0;
3898 estat
->ces_flags
= entry
->cfe_flags
;
3899 estat
->ces_necp_control_unit
=
3900 entry
->cfe_necp_control_unit
;
3902 estat
->ces_last_event
.tv_sec
=
3903 (int64_t)entry
->cfe_last_event
.tv_sec
;
3904 estat
->ces_last_event
.tv_usec
=
3905 (int64_t)entry
->cfe_last_event
.tv_usec
;
3907 estat
->ces_last_action
.tv_sec
=
3908 (int64_t)entry
->cfe_last_action
.tv_sec
;
3909 estat
->ces_last_action
.tv_usec
=
3910 (int64_t)entry
->cfe_last_action
.tv_usec
;
3912 ebuf
= &entry
->cfe_snd
;
3913 sbuf
= &estat
->ces_snd
;
3914 sbuf
->cbs_pending_first
=
3915 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3916 sbuf
->cbs_pending_last
=
3917 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3918 sbuf
->cbs_ctl_first
=
3919 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3920 sbuf
->cbs_ctl_last
=
3921 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3922 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3923 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3924 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3926 ebuf
= &entry
->cfe_rcv
;
3927 sbuf
= &estat
->ces_rcv
;
3928 sbuf
->cbs_pending_first
=
3929 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3930 sbuf
->cbs_pending_last
=
3931 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3932 sbuf
->cbs_ctl_first
=
3933 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3934 sbuf
->cbs_ctl_last
=
3935 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3936 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3937 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3938 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3940 error
= SYSCTL_OUT(req
, &stat
,
3941 sizeof (struct cfil_sock_stat
));
3946 cfil_rw_unlock_shared(&cfil_lck_rw
);