2 * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database specify which TCP/IP sockets needs to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a ingle filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is send by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 * The CFM_OP_DATA_UPDATE action messages let the user space filter
134 * agent allow data to flow up to the specified pass offset -- there
135 * is a pass offset for outgoing data and a pass offset for incoming data.
136 * When a new TCP/IP socket is attached to the content filter, each pass offset
137 * is initially set to 0 so not data is allowed to pass by default.
138 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
139 * then the data flow becomes unrestricted.
141 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
142 * with a pass offset smaller than the pass offset of a previous
143 * CFM_OP_DATA_UPDATE message is silently ignored.
145 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
146 * to tell the kernel how much data it wants to see by using the peek offsets.
147 * Just like pass offsets, there is a peek offset for each direction.
148 * When a new TCP/IP socket is attached to the content filter, each peek offset
149 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
150 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
151 * with a greater than 0 peek offset is sent by the user space filter agent.
152 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
153 * then the flow of update data events becomes unrestricted.
155 * Note that peek offsets cannot be smaller than the corresponding pass offset.
156 * Also a peek offsets cannot be smaller than the corresponding end offset
157 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
158 * to set a too small peek value is silently ignored.
161 * PER SOCKET "struct cfil_info"
163 * As soon as a TCP/IP socket gets attached to a content filter, a
164 * "struct cfil_info" is created to hold the content filtering state for this
167 * The content filtering state is made of the following information
168 * for each direction:
169 * - The current pass offset;
170 * - The first and last offsets of the data pending, waiting for a filtering
172 * - The inject queue for data that passed the filters and that needs
174 * - A content filter specific state in a set of "struct cfil_entry"
177 * CONTENT FILTER STATE "struct cfil_entry"
179 * The "struct cfil_entry" maintains the information most relevant to the
180 * message handling over a kernel control socket with a user space filter agent.
182 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
183 * to the kernel control socket unit it corresponds to and also has a pointer
184 * to the corresponding "struct content_filter".
186 * For each direction, "struct cfil_entry" maintains the following information:
189 * - The offset of the last data peeked at by the filter
190 * - A queue of data that's waiting to be delivered to the user space filter
191 * agent on the kernel control socket
192 * - A queue of data for which event messages have been sent on the kernel
193 * control socket and are pending for a filtering decision.
196 * CONTENT FILTER QUEUES
198 * Data that is being filtered is steered away from the TCP/IP socket buffer
199 * and instead will sit in one of three content filter queue until the data
200 * can be re-injected into the TCP/IP socket buffer.
202 * A content filter queue is represented by "struct cfil_queue" that contains
203 * a list of mbufs and the start and end offset of the data span of
206 * The data moves into the three content filter queues according to this
208 * a) The "cfe_ctl_q" of "struct cfil_entry"
209 * b) The "cfe_pending_q" of "struct cfil_entry"
210 * c) The "cfi_inject_q" of "struct cfil_info"
212 * Note: The seqyence (a),(b) may be repeated several times if there are more
213 * than one content filter attached to the TCP/IP socket.
215 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
216 * kernel conntrol socket for two reasons:
217 * - The peek offset is less that the end offset of the mbuf data
218 * - The kernel control socket is flow controlled
220 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
221 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
222 * socket and are waiting for a pass action message fromn the user space
223 * filter agent. An mbuf length must be fully allowed to pass to be removed
224 * from the cfe_pending_q.
226 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
227 * by the user space filter agent and that needs to be re-injected into the
231 * IMPACT ON FLOW CONTROL
233 * An essential aspect of the content filer subsystem is to minimize the
234 * impact on flow control of the TCP/IP sockets being filtered.
236 * The processing overhead of the content filtering may have an effect on
237 * flow control by adding noticeable delays and cannot be eliminated --
238 * care must be taken by the user space filter agent to minimize the
241 * The amount of data being filtered is kept in buffers while waiting for
242 * a decision by the user space filter agent. This amount of data pending
243 * needs to be subtracted from the amount of data available in the
244 * corresponding TCP/IP socket buffer. This is done by modifying
245 * sbspace() and tcp_sbspace() to account for amount of data pending
246 * in the content filter.
251 * The global state of content filter subsystem is protected by a single
252 * read-write lock "cfil_lck_rw". The data flow can be done with the
253 * cfil read-write lock held as shared so it can be re-entered from multiple
256 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
257 * protected by the socket lock.
259 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
260 * is held. That's why we have some sequences where we drop the cfil read-write
261 * lock before taking the TCP/IP lock.
263 * It is also important to lock the TCP/IP socket buffer while the content
264 * filter is modifying the amount of pending data. Otherwise the calculations
265 * in sbspace() and tcp_sbspace() could be wrong.
267 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
268 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
270 * Actually "cfe_link" and "cfe_filter" are protected by both by
271 * "cfil_lck_rw" and the socket lock: they may be modified only when
272 * "cfil_lck_rw" is exclusive and the socket is locked.
274 * To read the other fields of "struct content_filter" we have to take
275 * "cfil_lck_rw" in shared mode.
280 * - For TCP sockets only
282 * - Does not support TCP unordered messages
294 * If support datagram, enqueue control and address mbufs as well
297 #include <sys/types.h>
298 #include <sys/kern_control.h>
299 #include <sys/queue.h>
300 #include <sys/domain.h>
301 #include <sys/protosw.h>
302 #include <sys/syslog.h>
304 #include <kern/locks.h>
305 #include <kern/zalloc.h>
306 #include <kern/debug.h>
308 #include <net/content_filter.h>
310 #include <netinet/in_pcb.h>
311 #include <netinet/tcp.h>
312 #include <netinet/tcp_var.h>
315 #include <libkern/libkern.h>
318 #define MAX_CONTENT_FILTER 2
323 * The structure content_filter represents a user space content filter
324 * It's created and associated with a kernel control socket instance
326 struct content_filter
{
327 kern_ctl_ref cf_kcref
;
331 uint32_t cf_necp_control_unit
;
333 uint32_t cf_sock_count
;
334 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
337 #define CFF_ACTIVE 0x01
338 #define CFF_DETACHING 0x02
339 #define CFF_FLOW_CONTROLLED 0x04
341 struct content_filter
**content_filters
= NULL
;
342 uint32_t cfil_active_count
= 0; /* Number of active content filters */
343 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
344 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
346 static kern_ctl_ref cfil_kctlref
= NULL
;
348 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
349 static lck_attr_t
*cfil_lck_attr
= NULL
;
350 static lck_grp_t
*cfil_lck_grp
= NULL
;
351 decl_lck_rw_data(static, cfil_lck_rw
);
353 #define CFIL_RW_LCK_MAX 8
355 int cfil_rw_nxt_lck
= 0;
356 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
358 int cfil_rw_nxt_unlck
= 0;
359 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
361 #define CONTENT_FILTER_ZONE_NAME "content_filter"
362 #define CONTENT_FILTER_ZONE_MAX 10
363 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
366 #define CFIL_INFO_ZONE_NAME "cfil_info"
367 #define CFIL_INFO_ZONE_MAX 1024
368 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
370 MBUFQ_HEAD(cfil_mqhead
);
373 uint64_t q_start
; /* offset of first byte in queue */
374 uint64_t q_end
; /* offset of last byte in queue */
375 struct cfil_mqhead q_mq
;
381 * The is one entry per content filter
384 TAILQ_ENTRY(cfil_entry
) cfe_link
;
385 struct content_filter
*cfe_filter
;
387 struct cfil_info
*cfe_cfil_info
;
389 uint32_t cfe_necp_control_unit
;
390 struct timeval cfe_last_event
; /* To user space */
391 struct timeval cfe_last_action
; /* From user space */
395 * cfe_pending_q holds data that has been delivered to
396 * the filter and for which we are waiting for an action
398 struct cfil_queue cfe_pending_q
;
400 * This queue is for data that has not be delivered to
401 * the content filter (new data, pass peek or flow control)
403 struct cfil_queue cfe_ctl_q
;
405 uint64_t cfe_pass_offset
;
406 uint64_t cfe_peek_offset
;
411 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
412 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
413 #define CFEF_DATA_START 0x0004 /* can send data event */
414 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
415 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
416 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
417 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
418 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
423 * There is a struct cfil_info per socket
426 TAILQ_ENTRY(cfil_info
) cfi_link
;
427 struct socket
*cfi_so
;
429 uint64_t cfi_sock_id
;
433 * cfi_pending_first and cfi_pending_last describe the total
434 * amount of data outstanding for all the filters on
435 * this socket and data in the flow queue
436 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
438 uint64_t cfi_pending_first
;
439 uint64_t cfi_pending_last
;
440 int cfi_pending_mbcnt
;
442 * cfi_pass_offset is the minimum of all the filters
444 uint64_t cfi_pass_offset
;
446 * cfi_inject_q holds data that needs to be re-injected
447 * into the socket after filtering and that can
448 * be queued because of flow control
450 struct cfil_queue cfi_inject_q
;
453 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
456 #define CFIF_DROP 0x0001 /* drop action applied */
457 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
458 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
459 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
460 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
461 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
462 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
464 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
465 #define CFI_SHIFT_GENCNT 32
466 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
467 #define CFI_SHIFT_FLOWHASH 0
469 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
471 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
472 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
478 struct cfil_stats cfil_stats
;
481 * For troubleshooting
483 int cfil_log_level
= LOG_ERR
;
487 * Sysctls for logs and statistics
489 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
490 struct sysctl_req
*);
491 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
492 struct sysctl_req
*);
494 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "cfil");
496 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
497 &cfil_log_level
, 0, "");
499 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
502 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
503 &cfil_sock_attached_count
, 0, "");
505 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
506 &cfil_active_count
, 0, "");
508 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
509 &cfil_close_wait_timeout
, 0, "");
511 static int cfil_sbtrim
= 1;
512 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
513 &cfil_sbtrim
, 0, "");
515 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
516 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
518 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
519 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
521 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
522 &cfil_stats
, cfil_stats
, "");
525 * Forward declaration to appease the compiler
527 static int cfil_action_data_pass(struct socket
*, uint32_t, int,
529 static int cfil_action_drop(struct socket
*, uint32_t);
530 static int cfil_dispatch_closed_event(struct socket
*, int);
531 static int cfil_data_common(struct socket
*, int, struct sockaddr
*,
532 struct mbuf
*, struct mbuf
*, uint32_t);
533 static int cfil_data_filter(struct socket
*, uint32_t, int,
534 struct mbuf
*, uint64_t);
535 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
536 struct in_addr
, u_int16_t
);
537 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
538 struct in6_addr
*, u_int16_t
);
539 static int cfil_dispatch_attach_event(struct socket
*, uint32_t);
540 static void cfil_info_free(struct socket
*, struct cfil_info
*);
541 static struct cfil_info
* cfil_info_alloc(struct socket
*);
542 static int cfil_info_attach_unit(struct socket
*, uint32_t);
543 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
);
544 static int cfil_service_pending_queue(struct socket
*, uint32_t, int);
545 static int cfil_data_service_ctl_q(struct socket
*, uint32_t, int);
546 static void cfil_info_verify(struct cfil_info
*);
547 static int cfil_update_data_offsets(struct socket
*, uint32_t, int,
549 static int cfil_acquire_sockbuf(struct socket
*, int);
550 static void cfil_release_sockbuf(struct socket
*, int);
551 static int cfil_filters_attached(struct socket
*);
553 static void cfil_rw_lock_exclusive(lck_rw_t
*);
554 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
555 static void cfil_rw_lock_shared(lck_rw_t
*);
556 static void cfil_rw_unlock_shared(lck_rw_t
*);
557 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
558 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
560 static unsigned int cfil_data_length(struct mbuf
*, int *);
563 * Content filter global read write lock
567 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
571 lr_saved
= __builtin_return_address(0);
573 lck_rw_lock_exclusive(lck
);
575 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
576 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
580 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
584 lr_saved
= __builtin_return_address(0);
586 lck_rw_unlock_exclusive(lck
);
588 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
589 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
593 cfil_rw_lock_shared(lck_rw_t
*lck
)
597 lr_saved
= __builtin_return_address(0);
599 lck_rw_lock_shared(lck
);
601 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
602 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
606 cfil_rw_unlock_shared(lck_rw_t
*lck
)
610 lr_saved
= __builtin_return_address(0);
612 lck_rw_unlock_shared(lck
);
614 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
615 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
619 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
624 lr_saved
= __builtin_return_address(0);
626 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
628 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
629 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
635 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
639 lr_saved
= __builtin_return_address(0);
641 lck_rw_lock_exclusive_to_shared(lck
);
643 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
644 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
648 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
651 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
655 socket_lock_assert_owned(struct socket
*so
)
657 lck_mtx_t
*mutex_held
;
659 if (so
->so_proto
->pr_getlock
!= NULL
)
660 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
662 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
664 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
668 * Return the number of bytes in the mbuf chain using the same
669 * method as m_length() or sballoc()
672 cfil_data_length(struct mbuf
*m
, int *retmbcnt
)
678 if (retmbcnt
== NULL
)
679 return (m_length(m
));
683 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
686 if (m0
->m_flags
& M_EXT
)
687 mbcnt
+= m0
->m_ext
.ext_size
;
694 * Common mbuf queue utilities
698 cfil_queue_init(struct cfil_queue
*cfq
)
702 MBUFQ_INIT(&cfq
->q_mq
);
705 static inline uint64_t
706 cfil_queue_drain(struct cfil_queue
*cfq
)
708 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
711 MBUFQ_DRAIN(&cfq
->q_mq
);
716 /* Return 1 when empty, 0 otherwise */
718 cfil_queue_empty(struct cfil_queue
*cfq
)
720 return (MBUFQ_EMPTY(&cfq
->q_mq
));
723 static inline uint64_t
724 cfil_queue_offset_first(struct cfil_queue
*cfq
)
726 return (cfq
->q_start
);
729 static inline uint64_t
730 cfil_queue_offset_last(struct cfil_queue
*cfq
)
735 static inline uint64_t
736 cfil_queue_len(struct cfil_queue
*cfq
)
738 return (cfq
->q_end
- cfq
->q_start
);
742 * Routines to verify some fundamental assumptions
746 cfil_queue_verify(struct cfil_queue
*cfq
)
750 uint64_t queuesize
= 0;
752 /* Verify offset are ordered */
753 VERIFY(cfq
->q_start
<= cfq
->q_end
);
756 * When queue is empty, the offsets are equal otherwise the offsets
759 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
760 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
761 cfq
->q_start
!= cfq
->q_end
));
763 MBUFQ_FOREACH(m
, &cfq
->q_mq
) {
764 size_t chainsize
= 0;
765 unsigned int mlen
= m_length(m
);
767 if (m
== (void *)M_TAG_FREE_PATTERN
||
768 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
769 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
)
770 panic("%s - mq %p is free at %p", __func__
,
772 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
773 if (n
->m_type
!= MT_DATA
&&
774 n
->m_type
!= MT_HEADER
&&
775 n
->m_type
!= MT_OOBDATA
)
776 panic("%s - %p unsupported type %u", __func__
,
778 chainsize
+= n
->m_len
;
780 if (mlen
!= chainsize
)
781 panic("%s - %p m_length() %u != chainsize %lu",
782 __func__
, m
, mlen
, chainsize
);
783 queuesize
+= chainsize
;
785 if (queuesize
!= cfq
->q_end
- cfq
->q_start
)
786 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
787 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
791 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
793 CFIL_QUEUE_VERIFY(cfq
);
795 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
798 CFIL_QUEUE_VERIFY(cfq
);
802 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
804 CFIL_QUEUE_VERIFY(cfq
);
806 VERIFY(m_length(m
) == len
);
808 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
809 MBUFQ_NEXT(m
) = NULL
;
812 CFIL_QUEUE_VERIFY(cfq
);
816 cfil_queue_first(struct cfil_queue
*cfq
)
818 return (MBUFQ_FIRST(&cfq
->q_mq
));
822 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
825 return (MBUFQ_NEXT(m
));
829 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
831 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
832 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
834 /* Verify the queues are ordered so that pending is before ctl */
835 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
837 /* The peek offset cannot be less than the pass offset */
838 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
840 /* Make sure we've updated the offset we peeked at */
841 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
845 cfil_entry_verify(struct cfil_entry
*entry
)
847 cfil_entry_buf_verify(&entry
->cfe_snd
);
848 cfil_entry_buf_verify(&entry
->cfe_rcv
);
852 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
854 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
856 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
857 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
861 cfil_info_verify(struct cfil_info
*cfil_info
)
865 if (cfil_info
== NULL
)
868 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
869 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
871 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++)
872 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
876 verify_content_filter(struct content_filter
*cfc
)
878 struct cfil_entry
*entry
;
881 VERIFY(cfc
->cf_sock_count
>= 0);
883 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
885 VERIFY(cfc
== entry
->cfe_filter
);
887 VERIFY(count
== cfc
->cf_sock_count
);
891 * Kernel control socket callbacks
894 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
898 struct content_filter
*cfc
= NULL
;
900 CFIL_LOG(LOG_NOTICE
, "");
902 cfc
= zalloc(content_filter_zone
);
904 CFIL_LOG(LOG_ERR
, "zalloc failed");
908 bzero(cfc
, sizeof(struct content_filter
));
910 cfil_rw_lock_exclusive(&cfil_lck_rw
);
911 if (content_filters
== NULL
) {
912 struct content_filter
**tmp
;
914 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
917 struct content_filter
**,
918 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
922 cfil_rw_lock_exclusive(&cfil_lck_rw
);
924 if (tmp
== NULL
&& content_filters
== NULL
) {
926 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
929 /* Another thread may have won the race */
930 if (content_filters
!= NULL
)
933 content_filters
= tmp
;
936 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
937 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
939 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
940 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
944 * kernel control socket kcunit numbers start at 1
946 content_filters
[sac
->sc_unit
- 1] = cfc
;
948 cfc
->cf_kcref
= kctlref
;
949 cfc
->cf_kcunit
= sac
->sc_unit
;
950 TAILQ_INIT(&cfc
->cf_sock_entries
);
955 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
957 if (error
!= 0 && cfc
!= NULL
)
958 zfree(content_filter_zone
, cfc
);
961 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
963 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
965 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
966 error
, cfil_active_count
, sac
->sc_unit
);
972 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
974 #pragma unused(kctlref)
976 struct content_filter
*cfc
;
977 struct cfil_entry
*entry
;
979 CFIL_LOG(LOG_NOTICE
, "");
981 if (content_filters
== NULL
) {
982 CFIL_LOG(LOG_ERR
, "no content filter");
986 if (kcunit
> MAX_CONTENT_FILTER
) {
987 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
988 kcunit
, MAX_CONTENT_FILTER
);
993 cfc
= (struct content_filter
*)unitinfo
;
997 cfil_rw_lock_exclusive(&cfil_lck_rw
);
998 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
999 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1001 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1004 cfc
->cf_flags
|= CFF_DETACHING
;
1006 * Remove all sockets from the filter
1008 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1009 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1011 verify_content_filter(cfc
);
1013 * Accept all outstanding data by pushing to next filter
1016 * TBD: Actually we should make sure all data has been pushed
1019 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1020 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1021 struct socket
*so
= cfil_info
->cfi_so
;
1023 /* Need to let data flow immediately */
1024 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1028 * Respect locking hierarchy
1030 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1035 * When cfe_filter is NULL the filter is detached
1036 * and the entry has been removed from cf_sock_entries
1038 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
) {
1039 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1042 (void) cfil_action_data_pass(so
, kcunit
, 1,
1046 (void) cfil_action_data_pass(so
, kcunit
, 0,
1050 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1053 * Check again as the socket may have been unlocked
1054 * when when calling cfil_acquire_sockbuf()
1056 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
)
1059 /* The filter is now detached */
1060 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1061 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1062 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1064 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1065 cfil_filters_attached(so
) == 0) {
1066 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1067 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1068 wakeup((caddr_t
)&so
->so_cfil
);
1072 * Remove the filter entry from the content filter
1073 * but leave the rest of the state intact as the queues
1074 * may not be empty yet
1076 entry
->cfe_filter
= NULL
;
1077 entry
->cfe_necp_control_unit
= 0;
1079 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1080 cfc
->cf_sock_count
--;
1082 socket_unlock(so
, 1);
1085 verify_content_filter(cfc
);
1087 VERIFY(cfc
->cf_sock_count
== 0);
1090 * Make filter inactive
1092 content_filters
[kcunit
- 1] = NULL
;
1093 cfil_active_count
--;
1094 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1096 zfree(content_filter_zone
, cfc
);
1099 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1101 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1103 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1104 error
, cfil_active_count
, kcunit
);
1110 * cfil_acquire_sockbuf()
1112 * Prevent any other thread from acquiring the sockbuf
1113 * We use sb_cfil_thread as a semaphore to prevent other threads from
1114 * messing with the sockbuf -- see sblock()
1115 * Note: We do not set SB_LOCK here because the thread may check or modify
1116 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1117 * sblock(), sbunlock() or sodefunct()
1120 cfil_acquire_sockbuf(struct socket
*so
, int outgoing
)
1122 thread_t tp
= current_thread();
1123 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1124 lck_mtx_t
*mutex_held
;
1128 * Wait until no thread is holding the sockbuf and other content
1129 * filter threads have released the sockbuf
1131 while ((sb
->sb_flags
& SB_LOCK
) ||
1132 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1133 if (so
->so_proto
->pr_getlock
!= NULL
)
1134 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1136 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1138 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1141 VERIFY(sb
->sb_wantlock
!= 0);
1143 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1146 VERIFY(sb
->sb_wantlock
!= 0);
1150 * Use reference count for repetitive calls on same thread
1152 if (sb
->sb_cfil_refs
== 0) {
1153 VERIFY(sb
->sb_cfil_thread
== NULL
);
1154 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1156 sb
->sb_cfil_thread
= tp
;
1157 sb
->sb_flags
|= SB_LOCK
;
1161 /* We acquire the socket buffer when we need to cleanup */
1162 if (so
->so_cfil
== NULL
) {
1163 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1164 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1166 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1167 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1168 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1176 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1178 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1179 thread_t tp
= current_thread();
1181 socket_lock_assert_owned(so
);
1183 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)
1184 panic("%s sb_cfil_thread %p not current %p", __func__
,
1185 sb
->sb_cfil_thread
, tp
);
1187 * Don't panic if we are defunct because SB_LOCK has
1188 * been cleared by sodefunct()
1190 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
))
1191 panic("%s SB_LOCK not set on %p", __func__
,
1194 * We can unlock when the thread unwinds to the last reference
1197 if (sb
->sb_cfil_refs
== 0) {
1198 sb
->sb_cfil_thread
= NULL
;
1199 sb
->sb_flags
&= ~SB_LOCK
;
1201 if (sb
->sb_wantlock
> 0)
1202 wakeup(&sb
->sb_flags
);
1207 cfil_sock_id_from_socket(struct socket
*so
)
1209 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
)
1210 return (so
->so_cfil
->cfi_sock_id
);
1212 return (CFIL_SOCK_ID_NONE
);
1215 static struct socket
*
1216 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
)
1218 struct socket
*so
= NULL
;
1219 u_int64_t gencnt
= cfil_sock_id
>> 32;
1220 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1221 struct inpcb
*inp
= NULL
;
1222 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1224 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1225 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1226 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1227 inp
->inp_socket
!= NULL
&&
1228 inp
->inp_flowhash
== flowhash
&&
1229 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1230 inp
->inp_socket
->so_cfil
!= NULL
) {
1231 so
= inp
->inp_socket
;
1235 lck_rw_done(pcbinfo
->ipi_lock
);
1238 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1240 "no socket for sock_id %llx gencnt %llx flowhash %x",
1241 cfil_sock_id
, gencnt
, flowhash
);
1248 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1251 #pragma unused(kctlref, flags)
1253 struct cfil_msg_hdr
*msghdr
;
1254 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1256 struct cfil_msg_action
*action_msg
;
1257 struct cfil_entry
*entry
;
1259 CFIL_LOG(LOG_INFO
, "");
1261 if (content_filters
== NULL
) {
1262 CFIL_LOG(LOG_ERR
, "no content filter");
1266 if (kcunit
> MAX_CONTENT_FILTER
) {
1267 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1268 kcunit
, MAX_CONTENT_FILTER
);
1273 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1274 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1278 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1279 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1280 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1284 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1285 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1289 /* Validate action operation */
1290 switch (msghdr
->cfm_op
) {
1291 case CFM_OP_DATA_UPDATE
:
1293 &cfil_stats
.cfs_ctl_action_data_update
);
1296 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1299 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1300 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1304 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1305 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1307 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1312 cfil_rw_lock_shared(&cfil_lck_rw
);
1313 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1314 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1317 cfil_rw_unlock_shared(&cfil_lck_rw
);
1321 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
);
1323 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1324 msghdr
->cfm_sock_id
);
1326 cfil_rw_unlock_shared(&cfil_lck_rw
);
1329 cfil_rw_unlock_shared(&cfil_lck_rw
);
1333 if (so
->so_cfil
== NULL
) {
1334 CFIL_LOG(LOG_NOTICE
, "so %llx not attached",
1335 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1338 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1339 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1340 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1344 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
1345 if (entry
->cfe_filter
== NULL
) {
1346 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1347 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1352 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)
1353 entry
->cfe_flags
|= CFEF_DATA_START
;
1356 "so %llx attached not sent for %u",
1357 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1362 microuptime(&entry
->cfe_last_action
);
1364 action_msg
= (struct cfil_msg_action
*)msghdr
;
1366 switch (msghdr
->cfm_op
) {
1367 case CFM_OP_DATA_UPDATE
:
1368 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1369 action_msg
->cfa_out_pass_offset
!= 0)
1370 error
= cfil_action_data_pass(so
, kcunit
, 1,
1371 action_msg
->cfa_out_pass_offset
,
1372 action_msg
->cfa_out_peek_offset
);
1373 if (error
== EJUSTRETURN
)
1377 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1378 action_msg
->cfa_in_pass_offset
!= 0)
1379 error
= cfil_action_data_pass(so
, kcunit
, 0,
1380 action_msg
->cfa_in_pass_offset
,
1381 action_msg
->cfa_in_peek_offset
);
1382 if (error
== EJUSTRETURN
)
1387 error
= cfil_action_drop(so
, kcunit
);
1395 socket_unlock(so
, 1);
1400 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1402 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1408 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1409 int opt
, void *data
, size_t *len
)
1411 #pragma unused(kctlref, opt)
1413 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1415 CFIL_LOG(LOG_NOTICE
, "");
1417 cfil_rw_lock_shared(&cfil_lck_rw
);
1419 if (content_filters
== NULL
) {
1420 CFIL_LOG(LOG_ERR
, "no content filter");
1424 if (kcunit
> MAX_CONTENT_FILTER
) {
1425 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1426 kcunit
, MAX_CONTENT_FILTER
);
1430 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1431 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1437 case CFIL_OPT_NECP_CONTROL_UNIT
:
1438 if (*len
< sizeof(uint32_t)) {
1439 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1444 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1447 error
= ENOPROTOOPT
;
1451 cfil_rw_unlock_shared(&cfil_lck_rw
);
1457 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1458 int opt
, void *data
, size_t len
)
1460 #pragma unused(kctlref, opt)
1462 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1464 CFIL_LOG(LOG_NOTICE
, "");
1466 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1468 if (content_filters
== NULL
) {
1469 CFIL_LOG(LOG_ERR
, "no content filter");
1473 if (kcunit
> MAX_CONTENT_FILTER
) {
1474 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1475 kcunit
, MAX_CONTENT_FILTER
);
1479 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1480 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1486 case CFIL_OPT_NECP_CONTROL_UNIT
:
1487 if (len
< sizeof(uint32_t)) {
1488 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1489 "len too small %lu", len
);
1493 if (cfc
->cf_necp_control_unit
!= 0) {
1494 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1496 cfc
->cf_necp_control_unit
);
1500 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1503 error
= ENOPROTOOPT
;
1507 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1514 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1516 #pragma unused(kctlref, flags)
1517 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1518 struct socket
*so
= NULL
;
1520 struct cfil_entry
*entry
;
1522 CFIL_LOG(LOG_INFO
, "");
1524 if (content_filters
== NULL
) {
1525 CFIL_LOG(LOG_ERR
, "no content filter");
1526 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1529 if (kcunit
> MAX_CONTENT_FILTER
) {
1530 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1531 kcunit
, MAX_CONTENT_FILTER
);
1532 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1535 cfil_rw_lock_shared(&cfil_lck_rw
);
1536 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1537 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1539 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1542 /* Let's assume the flow control is lifted */
1543 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1544 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
1545 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1547 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1549 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1550 lck_rw_assert(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1553 * Flow control will be raised again as soon as an entry cannot enqueue
1554 * to the kernel control socket
1556 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1557 verify_content_filter(cfc
);
1559 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
1561 /* Find an entry that is flow controlled */
1562 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1563 if (entry
->cfe_cfil_info
== NULL
||
1564 entry
->cfe_cfil_info
->cfi_so
== NULL
)
1566 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0)
1572 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
1574 so
= entry
->cfe_cfil_info
->cfi_so
;
1576 cfil_rw_unlock_shared(&cfil_lck_rw
);
1580 error
= cfil_acquire_sockbuf(so
, 1);
1582 error
= cfil_data_service_ctl_q(so
, kcunit
, 1);
1583 cfil_release_sockbuf(so
, 1);
1587 error
= cfil_acquire_sockbuf(so
, 0);
1589 error
= cfil_data_service_ctl_q(so
, kcunit
, 0);
1590 cfil_release_sockbuf(so
, 0);
1593 socket_lock_assert_owned(so
);
1594 socket_unlock(so
, 1);
1596 cfil_rw_lock_shared(&cfil_lck_rw
);
1599 cfil_rw_unlock_shared(&cfil_lck_rw
);
1605 struct kern_ctl_reg kern_ctl
;
1607 vm_size_t content_filter_size
= 0; /* size of content_filter */
1608 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
1610 CFIL_LOG(LOG_NOTICE
, "");
1613 * Compile time verifications
1615 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
1616 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
1617 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
1618 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
1621 * Runtime time verifications
1623 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
1625 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
1627 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
1629 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
1632 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
1634 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
1637 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
1639 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
1641 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
1643 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
1647 * Zone for content filters kernel control sockets
1649 content_filter_size
= sizeof(struct content_filter
);
1650 content_filter_zone
= zinit(content_filter_size
,
1651 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
1653 CONTENT_FILTER_ZONE_NAME
);
1654 if (content_filter_zone
== NULL
) {
1655 panic("%s: zinit(%s) failed", __func__
,
1656 CONTENT_FILTER_ZONE_NAME
);
1659 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
1660 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
1663 * Zone for per socket content filters
1665 cfil_info_size
= sizeof(struct cfil_info
);
1666 cfil_info_zone
= zinit(cfil_info_size
,
1667 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
1669 CFIL_INFO_ZONE_NAME
);
1670 if (cfil_info_zone
== NULL
) {
1671 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
1674 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
1675 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
1680 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
1681 if (cfil_lck_grp_attr
== NULL
) {
1682 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
1685 cfil_lck_grp
= lck_grp_alloc_init("content filter",
1687 if (cfil_lck_grp
== NULL
) {
1688 panic("%s: lck_grp_alloc_init failed", __func__
);
1691 cfil_lck_attr
= lck_attr_alloc_init();
1692 if (cfil_lck_attr
== NULL
) {
1693 panic("%s: lck_attr_alloc_init failed", __func__
);
1696 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
1698 TAILQ_INIT(&cfil_sock_head
);
1701 * Register kernel control
1703 bzero(&kern_ctl
, sizeof(kern_ctl
));
1704 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
1705 sizeof(kern_ctl
.ctl_name
));
1706 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
1707 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
1708 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
1709 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
1710 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
1711 kern_ctl
.ctl_send
= cfil_ctl_send
;
1712 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
1713 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
1714 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
1715 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
1717 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
1723 cfil_info_alloc(struct socket
*so
)
1726 struct cfil_info
*cfil_info
= NULL
;
1727 struct inpcb
*inp
= sotoinpcb(so
);
1729 CFIL_LOG(LOG_INFO
, "");
1731 socket_lock_assert_owned(so
);
1733 cfil_info
= zalloc(cfil_info_zone
);
1734 if (cfil_info
== NULL
)
1736 bzero(cfil_info
, sizeof(struct cfil_info
));
1738 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
1739 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1741 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1742 struct cfil_entry
*entry
;
1744 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1745 entry
->cfe_cfil_info
= cfil_info
;
1747 /* Initialize the filter entry */
1748 entry
->cfe_filter
= NULL
;
1749 entry
->cfe_flags
= 0;
1750 entry
->cfe_necp_control_unit
= 0;
1751 entry
->cfe_snd
.cfe_pass_offset
= 0;
1752 entry
->cfe_snd
.cfe_peek_offset
= 0;
1753 entry
->cfe_snd
.cfe_peeked
= 0;
1754 entry
->cfe_rcv
.cfe_pass_offset
= 0;
1755 entry
->cfe_rcv
.cfe_peek_offset
= 0;
1756 entry
->cfe_rcv
.cfe_peeked
= 0;
1758 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
1759 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
1760 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
1761 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
1764 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1766 so
->so_cfil
= cfil_info
;
1767 cfil_info
->cfi_so
= so
;
1769 * Create a cfi_sock_id that's not the socket pointer!
1771 if (inp
->inp_flowhash
== 0)
1772 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
1773 cfil_info
->cfi_sock_id
=
1774 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
1776 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
1778 cfil_sock_attached_count
++;
1780 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1783 if (cfil_info
!= NULL
)
1784 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
1786 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
1792 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
)
1795 struct cfil_info
*cfil_info
= so
->so_cfil
;
1798 CFIL_LOG(LOG_INFO
, "");
1800 socket_lock_assert_owned(so
);
1802 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1805 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1807 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
1808 struct cfil_entry
*entry
;
1812 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
1815 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1817 entry
->cfe_filter
= cfc
;
1818 entry
->cfe_necp_control_unit
= filter_control_unit
;
1819 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1820 cfc
->cf_sock_count
++;
1821 verify_content_filter(cfc
);
1823 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
1827 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1833 cfil_info_free(struct socket
*so
, struct cfil_info
*cfil_info
)
1836 uint64_t in_drain
= 0;
1837 uint64_t out_drained
= 0;
1841 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
1842 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
1843 VERIFY(so
->so_usecount
> 0);
1846 if (cfil_info
== NULL
)
1849 CFIL_LOG(LOG_INFO
, "");
1851 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1854 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1856 struct cfil_entry
*entry
;
1857 struct content_filter
*cfc
;
1859 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1861 /* Don't be silly and try to detach twice */
1862 if (entry
->cfe_filter
== NULL
)
1865 cfc
= content_filters
[kcunit
- 1];
1867 VERIFY(cfc
== entry
->cfe_filter
);
1869 entry
->cfe_filter
= NULL
;
1870 entry
->cfe_necp_control_unit
= 0;
1871 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1872 cfc
->cf_sock_count
--;
1874 verify_content_filter(cfc
);
1876 cfil_sock_attached_count
--;
1877 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
1879 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
1880 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1882 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1883 struct cfil_entry
*entry
;
1885 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1886 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
1887 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
1888 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
1889 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
1891 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1894 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
1896 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
1898 zfree(cfil_info_zone
, cfil_info
);
1902 * Entry point from Sockets layer
1903 * The socket is locked.
1906 cfil_sock_attach(struct socket
*so
)
1909 uint32_t filter_control_unit
;
1911 socket_lock_assert_owned(so
);
1913 /* Limit ourselves to TCP */
1914 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
1915 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
1916 so
->so_proto
->pr_type
!= SOCK_STREAM
||
1917 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
)
1920 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
1921 if (filter_control_unit
== 0)
1924 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
1925 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
1928 if (cfil_active_count
== 0) {
1929 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
1932 if (so
->so_cfil
!= NULL
) {
1933 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
1934 CFIL_LOG(LOG_ERR
, "already attached");
1936 cfil_info_alloc(so
);
1937 if (so
->so_cfil
== NULL
) {
1939 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
1943 if (cfil_info_attach_unit(so
, filter_control_unit
) == 0) {
1944 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
1945 filter_control_unit
);
1946 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
1949 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockid %llx",
1950 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1951 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
1953 so
->so_flags
|= SOF_CONTENT_FILTER
;
1954 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
1956 /* Hold a reference on the socket */
1959 error
= cfil_dispatch_attach_event(so
, filter_control_unit
);
1960 /* We can recover from flow control or out of memory errors */
1961 if (error
== ENOBUFS
|| error
== ENOMEM
)
1963 else if (error
!= 0)
1966 CFIL_INFO_VERIFY(so
->so_cfil
);
1972 * Entry point from Sockets layer
1973 * The socket is locked.
1976 cfil_sock_detach(struct socket
*so
)
1979 cfil_info_free(so
, so
->so_cfil
);
1980 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
1986 cfil_dispatch_attach_event(struct socket
*so
, uint32_t filter_control_unit
)
1989 struct cfil_entry
*entry
= NULL
;
1990 struct cfil_msg_sock_attached msg_attached
;
1992 struct content_filter
*cfc
;
1994 socket_lock_assert_owned(so
);
1996 cfil_rw_lock_shared(&cfil_lck_rw
);
1998 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2003 * Find the matching filter unit
2005 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2006 cfc
= content_filters
[kcunit
- 1];
2010 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2012 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2013 if (entry
->cfe_filter
== NULL
)
2016 VERIFY(cfc
== entry
->cfe_filter
);
2021 if (entry
== NULL
|| entry
->cfe_filter
== NULL
)
2024 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
))
2027 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2028 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2030 /* Would be wasteful to try when flow controlled */
2031 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2036 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2037 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2038 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2039 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2040 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2041 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2043 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2044 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2045 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2046 msg_attached
.cfs_pid
= so
->last_pid
;
2047 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2048 if (so
->so_flags
& SOF_DELEGATED
) {
2049 msg_attached
.cfs_e_pid
= so
->e_pid
;
2050 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2052 msg_attached
.cfs_e_pid
= so
->last_pid
;
2053 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2055 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2056 entry
->cfe_filter
->cf_kcunit
,
2058 sizeof(struct cfil_msg_sock_attached
),
2061 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2064 microuptime(&entry
->cfe_last_event
);
2065 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2066 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2069 /* We can recover from flow control */
2070 if (error
== ENOBUFS
) {
2071 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2072 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2074 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2075 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2077 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2079 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2082 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2084 cfil_rw_unlock_shared(&cfil_lck_rw
);
2090 cfil_dispatch_disconnect_event(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2093 struct mbuf
*msg
= NULL
;
2094 struct cfil_entry
*entry
;
2095 struct cfe_buf
*entrybuf
;
2096 struct cfil_msg_hdr msg_disconnected
;
2097 struct content_filter
*cfc
;
2099 socket_lock_assert_owned(so
);
2101 cfil_rw_lock_shared(&cfil_lck_rw
);
2103 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2105 entrybuf
= &entry
->cfe_snd
;
2107 entrybuf
= &entry
->cfe_rcv
;
2109 cfc
= entry
->cfe_filter
;
2113 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2114 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2117 * Send the disconnection event once
2119 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2120 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2121 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2122 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2127 * We're not disconnected as long as some data is waiting
2128 * to be delivered to the filter
2130 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2131 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2132 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2136 /* Would be wasteful to try when flow controlled */
2137 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2142 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2143 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2144 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2145 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2146 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2147 CFM_OP_DISCONNECT_IN
;
2148 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2149 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2150 entry
->cfe_filter
->cf_kcunit
,
2152 sizeof(struct cfil_msg_hdr
),
2155 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2159 microuptime(&entry
->cfe_last_event
);
2161 /* Remember we have sent the disconnection message */
2163 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2164 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2166 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2167 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2170 if (error
== ENOBUFS
) {
2171 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2173 &cfil_stats
.cfs_disconnect_event_flow_control
);
2175 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2176 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2178 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2180 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2184 &cfil_stats
.cfs_disconnect_event_fail
);
2186 cfil_rw_unlock_shared(&cfil_lck_rw
);
2192 cfil_dispatch_closed_event(struct socket
*so
, int kcunit
)
2194 struct cfil_entry
*entry
;
2195 struct cfil_msg_hdr msg_closed
;
2197 struct content_filter
*cfc
;
2199 socket_lock_assert_owned(so
);
2201 cfil_rw_lock_shared(&cfil_lck_rw
);
2203 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2204 cfc
= entry
->cfe_filter
;
2208 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2209 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2211 /* Would be wasteful to try when flow controlled */
2212 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2217 * Send a single closed message per filter
2219 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0)
2221 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
2224 bzero(&msg_closed
, sizeof(struct cfil_msg_hdr
));
2225 msg_closed
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2226 msg_closed
.cfm_version
= CFM_VERSION_CURRENT
;
2227 msg_closed
.cfm_type
= CFM_TYPE_EVENT
;
2228 msg_closed
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2229 msg_closed
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2230 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2231 entry
->cfe_filter
->cf_kcunit
,
2233 sizeof(struct cfil_msg_hdr
),
2236 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2240 microuptime(&entry
->cfe_last_event
);
2241 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2242 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2244 /* We can recover from flow control */
2245 if (error
== ENOBUFS
) {
2246 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2247 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2249 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2250 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2252 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2254 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2257 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2259 cfil_rw_unlock_shared(&cfil_lck_rw
);
2266 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2267 struct in6_addr
*ip6
, u_int16_t port
)
2269 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2271 sin6
->sin6_family
= AF_INET6
;
2272 sin6
->sin6_len
= sizeof(*sin6
);
2273 sin6
->sin6_port
= port
;
2274 sin6
->sin6_addr
= *ip6
;
2275 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2276 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2277 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2282 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2283 struct in_addr ip
, u_int16_t port
)
2285 struct sockaddr_in
*sin
= &sin46
->sin
;
2287 sin
->sin_family
= AF_INET
;
2288 sin
->sin_len
= sizeof(*sin
);
2289 sin
->sin_port
= port
;
2290 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2294 cfil_dispatch_data_event(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2295 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2298 struct mbuf
*copy
= NULL
;
2299 struct mbuf
*msg
= NULL
;
2300 unsigned int one
= 1;
2301 struct cfil_msg_data_event
*data_req
;
2303 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2304 struct cfil_entry
*entry
;
2305 struct cfe_buf
*entrybuf
;
2306 struct content_filter
*cfc
;
2308 cfil_rw_lock_shared(&cfil_lck_rw
);
2310 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2312 entrybuf
= &entry
->cfe_snd
;
2314 entrybuf
= &entry
->cfe_rcv
;
2316 cfc
= entry
->cfe_filter
;
2320 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2321 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2323 socket_lock_assert_owned(so
);
2325 /* Would be wasteful to try */
2326 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2331 /* Make a copy of the data to pass to kernel control socket */
2332 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2335 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2340 /* We need an mbuf packet for the message header */
2341 hdrsize
= sizeof(struct cfil_msg_data_event
);
2342 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2344 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2347 * ENOBUFS is to indicate flow control
2352 mbuf_setlen(msg
, hdrsize
);
2353 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2355 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2356 bzero(data_req
, hdrsize
);
2357 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2358 data_req
->cfd_msghdr
.cfm_version
= 1;
2359 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2360 data_req
->cfd_msghdr
.cfm_op
=
2361 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2362 data_req
->cfd_msghdr
.cfm_sock_id
=
2363 entry
->cfe_cfil_info
->cfi_sock_id
;
2364 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2365 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
2369 * For non connected sockets need to copy addresses from passed
2372 if (inp
->inp_vflag
& INP_IPV6
) {
2374 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2375 &inp
->in6p_laddr
, inp
->inp_lport
);
2376 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2377 &inp
->in6p_faddr
, inp
->inp_fport
);
2379 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2380 &inp
->in6p_faddr
, inp
->inp_fport
);
2381 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2382 &inp
->in6p_laddr
, inp
->inp_lport
);
2384 } else if (inp
->inp_vflag
& INP_IPV4
) {
2386 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2387 inp
->inp_laddr
, inp
->inp_lport
);
2388 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2389 inp
->inp_faddr
, inp
->inp_fport
);
2391 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2392 inp
->inp_faddr
, inp
->inp_fport
);
2393 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2394 inp
->inp_laddr
, inp
->inp_lport
);
2398 /* Pass the message to the content filter */
2399 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
2400 entry
->cfe_filter
->cf_kcunit
,
2403 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2407 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
2408 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
2410 if (error
== ENOBUFS
) {
2411 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2413 &cfil_stats
.cfs_data_event_flow_control
);
2415 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2416 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2418 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2420 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2423 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
2425 cfil_rw_unlock_shared(&cfil_lck_rw
);
2431 * Process the queue of data waiting to be delivered to content filter
2434 cfil_data_service_ctl_q(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2437 struct mbuf
*data
, *tmp
= NULL
;
2438 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
2439 struct cfil_entry
*entry
;
2440 struct cfe_buf
*entrybuf
;
2441 uint64_t currentoffset
= 0;
2443 if (so
->so_cfil
== NULL
)
2446 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2447 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2449 socket_lock_assert_owned(so
);
2451 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2453 entrybuf
= &entry
->cfe_snd
;
2455 entrybuf
= &entry
->cfe_rcv
;
2457 /* Send attached message if not yet done */
2458 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
2459 error
= cfil_dispatch_attach_event(so
, kcunit
);
2461 /* We can recover from flow control */
2462 if (error
== ENOBUFS
|| error
== ENOMEM
)
2466 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
2467 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
2470 CFIL_LOG(LOG_DEBUG
, "pass_offset %llu peeked %llu peek_offset %llu",
2471 entrybuf
->cfe_pass_offset
,
2472 entrybuf
->cfe_peeked
,
2473 entrybuf
->cfe_peek_offset
);
2475 /* Move all data that can pass */
2476 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
2477 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
2478 datalen
= cfil_data_length(data
, NULL
);
2481 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
2482 entrybuf
->cfe_pass_offset
) {
2484 * The first mbuf can fully pass
2489 * The first mbuf can partially pass
2491 copylen
= entrybuf
->cfe_pass_offset
-
2492 entrybuf
->cfe_ctl_q
.q_start
;
2494 VERIFY(copylen
<= datalen
);
2497 "%llx first %llu peeked %llu pass %llu peek %llu"
2498 "datalen %u copylen %u",
2499 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2500 entrybuf
->cfe_ctl_q
.q_start
,
2501 entrybuf
->cfe_peeked
,
2502 entrybuf
->cfe_pass_offset
,
2503 entrybuf
->cfe_peek_offset
,
2507 * Data that passes has been peeked at explicitly or
2510 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
2511 entrybuf
->cfe_peeked
)
2512 entrybuf
->cfe_peeked
=
2513 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
2515 * Stop on partial pass
2517 if (copylen
< datalen
)
2520 /* All good, move full data from ctl queue to pending queue */
2521 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2523 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
2525 OSAddAtomic64(datalen
,
2526 &cfil_stats
.cfs_pending_q_out_enqueued
);
2528 OSAddAtomic64(datalen
,
2529 &cfil_stats
.cfs_pending_q_in_enqueued
);
2531 CFIL_INFO_VERIFY(so
->so_cfil
);
2534 "%llx first %llu peeked %llu pass %llu peek %llu"
2535 "datalen %u copylen %u",
2536 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2537 entrybuf
->cfe_ctl_q
.q_start
,
2538 entrybuf
->cfe_peeked
,
2539 entrybuf
->cfe_pass_offset
,
2540 entrybuf
->cfe_peek_offset
,
2544 /* Now deal with remaining data the filter wants to peek at */
2545 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
2546 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
2547 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
2548 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
2549 currentoffset
+= datalen
) {
2550 datalen
= cfil_data_length(data
, NULL
);
2553 /* We've already peeked at this mbuf */
2554 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
)
2557 * The data in the first mbuf may have been
2558 * partially peeked at
2560 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
2561 VERIFY(copyoffset
< datalen
);
2562 copylen
= datalen
- copyoffset
;
2563 VERIFY(copylen
<= datalen
);
2565 * Do not copy more than needed
2567 if (currentoffset
+ copyoffset
+ copylen
>
2568 entrybuf
->cfe_peek_offset
) {
2569 copylen
= entrybuf
->cfe_peek_offset
-
2570 (currentoffset
+ copyoffset
);
2574 "%llx current %llu peeked %llu pass %llu peek %llu"
2575 "datalen %u copylen %u copyoffset %u",
2576 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2578 entrybuf
->cfe_peeked
,
2579 entrybuf
->cfe_pass_offset
,
2580 entrybuf
->cfe_peek_offset
,
2581 datalen
, copylen
, copyoffset
);
2584 * Stop if there is nothing more to peek at
2589 * Let the filter get a peek at this span of data
2591 error
= cfil_dispatch_data_event(so
, kcunit
,
2592 outgoing
, data
, copyoffset
, copylen
);
2594 /* On error, leave data in ctl_q */
2597 entrybuf
->cfe_peeked
+= copylen
;
2599 OSAddAtomic64(copylen
,
2600 &cfil_stats
.cfs_ctl_q_out_peeked
);
2602 OSAddAtomic64(copylen
,
2603 &cfil_stats
.cfs_ctl_q_in_peeked
);
2605 /* Stop when data could not be fully peeked at */
2606 if (copylen
+ copyoffset
< datalen
)
2609 CFIL_INFO_VERIFY(so
->so_cfil
);
2612 "%llx first %llu peeked %llu pass %llu peek %llu"
2613 "datalen %u copylen %u copyoffset %u",
2614 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2616 entrybuf
->cfe_peeked
,
2617 entrybuf
->cfe_pass_offset
,
2618 entrybuf
->cfe_peek_offset
,
2619 datalen
, copylen
, copyoffset
);
2622 * Process data that has passed the filter
2624 error
= cfil_service_pending_queue(so
, kcunit
, outgoing
);
2626 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
2632 * Dispatch disconnect events that could not be sent
2634 if (so
->so_cfil
== NULL
)
2636 else if (outgoing
) {
2637 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) &&
2638 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
))
2639 cfil_dispatch_disconnect_event(so
, kcunit
, 1);
2641 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) &&
2642 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))
2643 cfil_dispatch_disconnect_event(so
, kcunit
, 0);
2648 "first %llu peeked %llu pass %llu peek %llu",
2649 entrybuf
->cfe_ctl_q
.q_start
,
2650 entrybuf
->cfe_peeked
,
2651 entrybuf
->cfe_pass_offset
,
2652 entrybuf
->cfe_peek_offset
);
2654 CFIL_INFO_VERIFY(so
->so_cfil
);
2659 * cfil_data_filter()
2661 * Process data for a content filter installed on a socket
2664 cfil_data_filter(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2665 struct mbuf
*data
, uint64_t datalen
)
2668 struct cfil_entry
*entry
;
2669 struct cfe_buf
*entrybuf
;
2671 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2672 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2674 socket_lock_assert_owned(so
);
2676 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2678 entrybuf
= &entry
->cfe_snd
;
2680 entrybuf
= &entry
->cfe_rcv
;
2682 /* Are we attached to the filter? */
2683 if (entry
->cfe_filter
== NULL
) {
2688 /* Dispatch to filters */
2689 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2691 OSAddAtomic64(datalen
,
2692 &cfil_stats
.cfs_ctl_q_out_enqueued
);
2694 OSAddAtomic64(datalen
,
2695 &cfil_stats
.cfs_ctl_q_in_enqueued
);
2697 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2699 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
2703 * We have to return EJUSTRETURN in all cases to avoid double free
2706 error
= EJUSTRETURN
;
2708 CFIL_INFO_VERIFY(so
->so_cfil
);
2710 CFIL_LOG(LOG_INFO
, "return %d", error
);
2715 * cfil_service_inject_queue() re-inject data that passed the
2719 cfil_service_inject_queue(struct socket
*so
, int outgoing
)
2722 unsigned int datalen
;
2724 unsigned int copylen
;
2726 struct mbuf
*copy
= NULL
;
2727 struct cfi_buf
*cfi_buf
;
2728 struct cfil_queue
*inject_q
;
2729 int need_rwakeup
= 0;
2731 if (so
->so_cfil
== NULL
)
2734 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
2735 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
2737 socket_lock_assert_owned(so
);
2740 cfi_buf
= &so
->so_cfil
->cfi_snd
;
2741 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
2743 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
2744 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
2746 inject_q
= &cfi_buf
->cfi_inject_q
;
2748 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
2749 datalen
= cfil_data_length(data
, &mbcnt
);
2751 CFIL_LOG(LOG_INFO
, "data %llx datalen %u",
2752 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
);
2754 /* Make a copy in case of injection error */
2755 copy
= m_copym_mode(data
, 0, M_COPYALL
, M_DONTWAIT
,
2758 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2763 if ((copylen
= m_length(copy
)) != datalen
)
2764 panic("%s so %p copylen %d != datalen %d",
2765 __func__
, so
, copylen
, datalen
);
2768 socket_unlock(so
, 0);
2771 * Set both DONTWAIT and NBIO flags are we really
2772 * do not want to block
2774 error
= sosend(so
, NULL
, NULL
,
2776 MSG_SKIPCFIL
| MSG_DONTWAIT
| MSG_NBIO
);
2781 CFIL_LOG(LOG_ERR
, "sosend() failed %d",
2785 copy
->m_flags
|= M_SKIPCFIL
;
2789 * This work only because we support plain TCP
2790 * For UDP, RAWIP, MPTCP and message TCP we'll
2791 * need to call the appropriate sbappendxxx()
2792 * of fix sock_inject_data_in()
2794 if (sbappendstream(&so
->so_rcv
, copy
))
2798 /* Need to reassess if filter is still attached after unlock */
2799 if (so
->so_cfil
== NULL
) {
2800 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2801 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2802 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_detached
);
2809 /* Injection successful */
2810 cfil_queue_remove(inject_q
, data
, datalen
);
2813 cfi_buf
->cfi_pending_first
+= datalen
;
2814 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
2815 cfil_info_buf_verify(cfi_buf
);
2818 OSAddAtomic64(datalen
,
2819 &cfil_stats
.cfs_inject_q_out_passed
);
2821 OSAddAtomic64(datalen
,
2822 &cfil_stats
.cfs_inject_q_in_passed
);
2825 /* A single wakeup is for several packets is more efficient */
2829 if (error
!= 0 && so
->so_cfil
) {
2830 if (error
== ENOBUFS
)
2831 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
2832 if (error
== ENOMEM
)
2833 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
2836 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
2837 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
2839 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
2840 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
2847 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
)) {
2848 cfil_sock_notify_shutdown(so
, SHUT_WR
);
2849 if (cfil_sock_data_pending(&so
->so_snd
) == 0)
2850 soshutdownlock_final(so
, SHUT_WR
);
2852 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
2853 if (cfil_filters_attached(so
) == 0) {
2854 CFIL_LOG(LOG_INFO
, "so %llx waking",
2855 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2856 wakeup((caddr_t
)&so
->so_cfil
);
2860 CFIL_INFO_VERIFY(so
->so_cfil
);
2866 cfil_service_pending_queue(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2868 uint64_t passlen
, curlen
;
2870 unsigned int datalen
;
2872 struct cfil_entry
*entry
;
2873 struct cfe_buf
*entrybuf
;
2874 struct cfil_queue
*pending_q
;
2876 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2877 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2879 socket_lock_assert_owned(so
);
2881 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2883 entrybuf
= &entry
->cfe_snd
;
2885 entrybuf
= &entry
->cfe_rcv
;
2887 pending_q
= &entrybuf
->cfe_pending_q
;
2889 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
2892 * Locate the chunks of data that we can pass to the next filter
2893 * A data chunk must be on mbuf boundaries
2896 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
2897 datalen
= cfil_data_length(data
, NULL
);
2900 "data %llx datalen %u passlen %llu curlen %llu",
2901 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
2904 if (curlen
+ datalen
> passlen
)
2907 cfil_queue_remove(pending_q
, data
, datalen
);
2912 kcunit
<= MAX_CONTENT_FILTER
;
2914 error
= cfil_data_filter(so
, kcunit
, outgoing
,
2916 /* 0 means passed so we can continue */
2920 /* When data has passed all filters, re-inject */
2924 &so
->so_cfil
->cfi_snd
.cfi_inject_q
,
2926 OSAddAtomic64(datalen
,
2927 &cfil_stats
.cfs_inject_q_out_enqueued
);
2930 &so
->so_cfil
->cfi_rcv
.cfi_inject_q
,
2932 OSAddAtomic64(datalen
,
2933 &cfil_stats
.cfs_inject_q_in_enqueued
);
2938 CFIL_INFO_VERIFY(so
->so_cfil
);
2944 cfil_update_data_offsets(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2945 uint64_t pass_offset
, uint64_t peek_offset
)
2948 struct cfil_entry
*entry
= NULL
;
2949 struct cfe_buf
*entrybuf
;
2952 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
2954 socket_lock_assert_owned(so
);
2956 if (so
->so_cfil
== NULL
) {
2957 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2958 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2961 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
2962 CFIL_LOG(LOG_ERR
, "so %llx drop set",
2963 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2968 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2970 entrybuf
= &entry
->cfe_snd
;
2972 entrybuf
= &entry
->cfe_rcv
;
2974 /* Record updated offsets for this content filter */
2975 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
2976 entrybuf
->cfe_pass_offset
= pass_offset
;
2978 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
2979 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
2982 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
2983 pass_offset
, entrybuf
->cfe_pass_offset
);
2985 /* Filter does not want or need to see data that's allowed to pass */
2986 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
2987 peek_offset
> entrybuf
->cfe_peek_offset
) {
2988 entrybuf
->cfe_peek_offset
= peek_offset
;
2995 /* Move data held in control queue to pending queue if needed */
2996 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2998 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3002 error
= EJUSTRETURN
;
3006 * The filter is effectively detached when pass all from both sides
3007 * or when the socket is closed and no more data is waiting
3008 * to be delivered to the filter
3010 if (entry
!= NULL
&&
3011 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3012 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3013 ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3014 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3015 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3016 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3017 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3018 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3019 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3020 cfil_filters_attached(so
) == 0) {
3021 CFIL_LOG(LOG_INFO
, "so %llx waking",
3022 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3023 wakeup((caddr_t
)&so
->so_cfil
);
3026 CFIL_INFO_VERIFY(so
->so_cfil
);
3027 CFIL_LOG(LOG_INFO
, "return %d", error
);
3032 * Update pass offset for socket when no data is pending
3035 cfil_set_socket_pass_offset(struct socket
*so
, int outgoing
)
3037 struct cfi_buf
*cfi_buf
;
3038 struct cfil_entry
*entry
;
3039 struct cfe_buf
*entrybuf
;
3041 uint64_t pass_offset
= 0;
3043 if (so
->so_cfil
== NULL
)
3046 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3047 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3049 socket_lock_assert_owned(so
);
3052 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3054 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3056 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3057 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3058 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3060 /* Are we attached to a filter? */
3061 if (entry
->cfe_filter
== NULL
)
3065 entrybuf
= &entry
->cfe_snd
;
3067 entrybuf
= &entry
->cfe_rcv
;
3069 if (pass_offset
== 0 ||
3070 entrybuf
->cfe_pass_offset
< pass_offset
)
3071 pass_offset
= entrybuf
->cfe_pass_offset
;
3073 cfi_buf
->cfi_pass_offset
= pass_offset
;
3080 cfil_action_data_pass(struct socket
*so
, uint32_t kcunit
, int outgoing
,
3081 uint64_t pass_offset
, uint64_t peek_offset
)
3085 CFIL_LOG(LOG_INFO
, "");
3087 socket_lock_assert_owned(so
);
3089 error
= cfil_acquire_sockbuf(so
, outgoing
);
3091 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3092 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3093 outgoing
? "out" : "in");
3097 error
= cfil_update_data_offsets(so
, kcunit
, outgoing
,
3098 pass_offset
, peek_offset
);
3100 cfil_service_inject_queue(so
, outgoing
);
3102 cfil_set_socket_pass_offset(so
, outgoing
);
3104 CFIL_INFO_VERIFY(so
->so_cfil
);
3105 cfil_release_sockbuf(so
, outgoing
);
3112 cfil_flush_queues(struct socket
*so
)
3114 struct cfil_entry
*entry
;
3118 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3121 socket_lock_assert_owned(so
);
3124 * Flush the output queues and ignore errors as long as
3127 (void) cfil_acquire_sockbuf(so
, 1);
3128 if (so
->so_cfil
!= NULL
) {
3130 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3131 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3133 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3134 drained
+= cfil_queue_drain(
3135 &entry
->cfe_snd
.cfe_pending_q
);
3137 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_snd
.cfi_inject_q
);
3139 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3141 &cfil_stats
.cfs_flush_out_drop
);
3144 &cfil_stats
.cfs_flush_out_close
);
3147 cfil_release_sockbuf(so
, 1);
3150 * Flush the input queues
3152 (void) cfil_acquire_sockbuf(so
, 0);
3153 if (so
->so_cfil
!= NULL
) {
3155 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3156 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3158 drained
+= cfil_queue_drain(
3159 &entry
->cfe_rcv
.cfe_ctl_q
);
3160 drained
+= cfil_queue_drain(
3161 &entry
->cfe_rcv
.cfe_pending_q
);
3163 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_rcv
.cfi_inject_q
);
3165 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3167 &cfil_stats
.cfs_flush_in_drop
);
3170 &cfil_stats
.cfs_flush_in_close
);
3173 cfil_release_sockbuf(so
, 0);
3175 CFIL_INFO_VERIFY(so
->so_cfil
);
3179 cfil_action_drop(struct socket
*so
, uint32_t kcunit
)
3182 struct cfil_entry
*entry
;
3185 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3188 socket_lock_assert_owned(so
);
3190 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3192 /* Are we attached to the filter? */
3193 if (entry
->cfe_filter
== NULL
)
3196 so
->so_cfil
->cfi_flags
|= CFIF_DROP
;
3201 * Force the socket to be marked defunct
3202 * (forcing fixed along with rdar://19391339)
3204 error
= sosetdefunct(p
, so
,
3205 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
3208 /* Flush the socket buffer and disconnect */
3210 error
= sodefunct(p
, so
,
3211 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3213 /* The filter is done, mark as detached */
3214 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3215 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3216 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3218 /* Pending data needs to go */
3219 cfil_flush_queues(so
);
3221 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3222 if (cfil_filters_attached(so
) == 0) {
3223 CFIL_LOG(LOG_INFO
, "so %llx waking",
3224 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3225 wakeup((caddr_t
)&so
->so_cfil
);
3233 cfil_update_entry_offsets(struct socket
*so
, int outgoing
, unsigned int datalen
)
3235 struct cfil_entry
*entry
;
3236 struct cfe_buf
*entrybuf
;
3239 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3240 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3242 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3243 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3245 /* Are we attached to the filter? */
3246 if (entry
->cfe_filter
== NULL
)
3250 entrybuf
= &entry
->cfe_snd
;
3252 entrybuf
= &entry
->cfe_rcv
;
3254 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
3255 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
3256 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
3257 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3258 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3260 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
3262 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
3263 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
3265 CFIL_INFO_VERIFY(so
->so_cfil
);
3270 cfil_data_common(struct socket
*so
, int outgoing
, struct sockaddr
*to
,
3271 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3273 #pragma unused(to, control, flags)
3275 unsigned int datalen
;
3278 struct cfi_buf
*cfi_buf
;
3280 if (so
->so_cfil
== NULL
) {
3281 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3282 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3285 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3286 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3287 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3292 datalen
= cfil_data_length(data
, &mbcnt
);
3294 CFIL_LOG(LOG_INFO
, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3295 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3296 outgoing
? "out" : "in",
3297 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
3298 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
));
3301 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3303 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3305 cfi_buf
->cfi_pending_last
+= datalen
;
3306 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
3307 cfil_info_buf_verify(cfi_buf
);
3309 CFIL_LOG(LOG_INFO
, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3310 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3311 cfi_buf
->cfi_pending_last
,
3312 cfi_buf
->cfi_pass_offset
);
3314 /* Fast path when below pass offset */
3315 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
3316 cfil_update_entry_offsets(so
, outgoing
, datalen
);
3318 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3319 error
= cfil_data_filter(so
, kcunit
, outgoing
, data
,
3321 /* 0 means passed so continue with next filter */
3327 /* Move cursor if no filter claimed the data */
3329 cfi_buf
->cfi_pending_first
+= datalen
;
3330 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3331 cfil_info_buf_verify(cfi_buf
);
3334 CFIL_INFO_VERIFY(so
->so_cfil
);
3340 * Callback from socket layer sosendxxx()
3343 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
3344 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3348 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3351 socket_lock_assert_owned(so
);
3353 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3354 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3355 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3358 if (control
!= NULL
) {
3359 CFIL_LOG(LOG_ERR
, "so %llx control",
3360 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3361 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
3363 if ((flags
& MSG_OOB
)) {
3364 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3365 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3366 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
3368 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0)
3369 panic("so %p SB_LOCK not set", so
);
3371 if (so
->so_snd
.sb_cfil_thread
!= NULL
)
3372 panic("%s sb_cfil_thread %p not NULL", __func__
,
3373 so
->so_snd
.sb_cfil_thread
);
3375 error
= cfil_data_common(so
, 1, to
, data
, control
, flags
);
3381 * Callback from socket layer sbappendxxx()
3384 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
3385 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3389 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3392 socket_lock_assert_owned(so
);
3394 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3395 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3396 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3399 if (control
!= NULL
) {
3400 CFIL_LOG(LOG_ERR
, "so %llx control",
3401 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3402 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
3404 if (data
->m_type
== MT_OOBDATA
) {
3405 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3406 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3407 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
3409 error
= cfil_data_common(so
, 0, from
, data
, control
, flags
);
3415 * Callback from socket layer soshutdownxxx()
3417 * We may delay the shutdown write if there's outgoing data in process.
3419 * There is no point in delaying the shutdown read because the process
3420 * indicated that it does not want to read anymore data.
3423 cfil_sock_shutdown(struct socket
*so
, int *how
)
3427 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3430 socket_lock_assert_owned(so
);
3432 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3433 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
3436 * Check the state of the socket before the content filter
3438 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
3439 /* read already shut down */
3443 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
3444 /* write already shut down */
3449 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
3450 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3451 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3456 * shutdown read: SHUT_RD or SHUT_RDWR
3458 if (*how
!= SHUT_WR
) {
3459 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
3463 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
3464 cfil_sock_notify_shutdown(so
, SHUT_RD
);
3467 * shutdown write: SHUT_WR or SHUT_RDWR
3469 if (*how
!= SHUT_RD
) {
3470 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
3474 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
3475 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3477 * When outgoing data is pending, we delay the shutdown at the
3478 * protocol level until the content filters give the final
3479 * verdict on the pending data.
3481 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
3483 * When shutting down the read and write sides at once
3484 * we can proceed to the final shutdown of the read
3485 * side. Otherwise, we just return.
3487 if (*how
== SHUT_WR
) {
3488 error
= EJUSTRETURN
;
3489 } else if (*how
== SHUT_RDWR
) {
3499 * This is called when the socket is closed and there is no more
3500 * opportunity for filtering
3503 cfil_sock_is_closed(struct socket
*so
)
3508 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3511 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3513 socket_lock_assert_owned(so
);
3515 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3516 /* Let the filters know of the closing */
3517 error
= cfil_dispatch_closed_event(so
, kcunit
);
3520 /* Last chance to push passed data out */
3521 error
= cfil_acquire_sockbuf(so
, 1);
3523 cfil_service_inject_queue(so
, 1);
3524 cfil_release_sockbuf(so
, 1);
3526 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
3528 /* Pending data needs to go */
3529 cfil_flush_queues(so
);
3531 CFIL_INFO_VERIFY(so
->so_cfil
);
3535 * This is called when the socket is disconnected so let the filters
3536 * know about the disconnection and that no more data will come
3538 * The how parameter has the same values as soshutown()
3541 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
3546 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3549 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3550 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
3552 socket_lock_assert_owned(so
);
3554 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3555 /* Disconnect incoming side */
3557 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 0);
3558 /* Disconnect outgoing side */
3560 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 1);
3565 cfil_filters_attached(struct socket
*so
)
3567 struct cfil_entry
*entry
;
3571 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3574 socket_lock_assert_owned(so
);
3576 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3577 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3579 /* Are we attached to the filter? */
3580 if (entry
->cfe_filter
== NULL
)
3582 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
3584 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
3594 * This is called when the socket is closed and we are waiting for
3595 * the filters to gives the final pass or drop
3598 cfil_sock_close_wait(struct socket
*so
)
3600 lck_mtx_t
*mutex_held
;
3604 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3607 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3609 if (so
->so_proto
->pr_getlock
!= NULL
)
3610 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3612 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3613 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3615 while (cfil_filters_attached(so
)) {
3617 * Notify the filters we are going away so they can detach
3619 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
3622 * Make sure we need to wait after the filter are notified
3623 * of the disconnection
3625 if (cfil_filters_attached(so
) == 0)
3628 CFIL_LOG(LOG_INFO
, "so %llx waiting",
3629 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3631 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
3632 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
3633 NSEC_PER_USEC
* 1000;
3635 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
3636 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
3637 error
= msleep((caddr_t
)&so
->so_cfil
, mutex_held
,
3638 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
3639 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
3641 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
3642 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
3645 * Force close in case of timeout
3648 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
3656 * Returns the size of the data held by the content filter by using
3659 cfil_sock_data_pending(struct sockbuf
*sb
)
3661 struct socket
*so
= sb
->sb_so
;
3662 uint64_t pending
= 0;
3664 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
3665 struct cfi_buf
*cfi_buf
;
3667 socket_lock_assert_owned(so
);
3669 if ((sb
->sb_flags
& SB_RECV
) == 0)
3670 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3672 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3674 pending
= cfi_buf
->cfi_pending_last
-
3675 cfi_buf
->cfi_pending_first
;
3678 * If we are limited by the "chars of mbufs used" roughly
3679 * adjust so we won't overcommit
3681 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
)
3682 pending
= cfi_buf
->cfi_pending_mbcnt
;
3685 VERIFY(pending
< INT32_MAX
);
3687 return (int32_t)(pending
);
3691 * Return the socket buffer space used by data being held by content filters
3692 * so processes won't clog the socket buffer
3695 cfil_sock_data_space(struct sockbuf
*sb
)
3697 struct socket
*so
= sb
->sb_so
;
3698 uint64_t pending
= 0;
3700 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
3701 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
3702 struct cfi_buf
*cfi_buf
;
3704 socket_lock_assert_owned(so
);
3706 if ((sb
->sb_flags
& SB_RECV
) == 0)
3707 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3709 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3711 pending
= cfi_buf
->cfi_pending_last
-
3712 cfi_buf
->cfi_pending_first
;
3715 * If we are limited by the "chars of mbufs used" roughly
3716 * adjust so we won't overcommit
3718 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
3719 pending
= cfi_buf
->cfi_pending_mbcnt
;
3722 VERIFY(pending
< INT32_MAX
);
3724 return (int32_t)(pending
);
3728 * A callback from the socket and protocol layer when data becomes
3729 * available in the socket buffer to give a chance for the content filter
3730 * to re-inject data that was held back
3733 cfil_sock_buf_update(struct sockbuf
*sb
)
3737 struct socket
*so
= sb
->sb_so
;
3739 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3745 socket_lock_assert_owned(so
);
3747 if ((sb
->sb_flags
& SB_RECV
) == 0) {
3748 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
3751 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
3753 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
3756 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
3759 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
3760 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3762 error
= cfil_acquire_sockbuf(so
, outgoing
);
3764 cfil_service_inject_queue(so
, outgoing
);
3765 cfil_release_sockbuf(so
, outgoing
);
3769 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3770 struct sysctl_req
*req
)
3772 #pragma unused(oidp, arg1, arg2)
3778 if (req
->newptr
!= USER_ADDR_NULL
)
3781 cfil_rw_lock_shared(&cfil_lck_rw
);
3783 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
3784 struct cfil_filter_stat filter_stat
;
3785 struct content_filter
*cfc
= content_filters
[i
];
3790 /* If just asking for the size */
3791 if (req
->oldptr
== USER_ADDR_NULL
) {
3792 len
+= sizeof(struct cfil_filter_stat
);
3796 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
3797 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
3798 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
3799 filter_stat
.cfs_flags
= cfc
->cf_flags
;
3800 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
3801 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
3803 error
= SYSCTL_OUT(req
, &filter_stat
,
3804 sizeof (struct cfil_filter_stat
));
3808 /* If just asking for the size */
3809 if (req
->oldptr
== USER_ADDR_NULL
)
3812 cfil_rw_unlock_shared(&cfil_lck_rw
);
3817 static int sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3818 struct sysctl_req
*req
)
3820 #pragma unused(oidp, arg1, arg2)
3823 struct cfil_info
*cfi
;
3826 if (req
->newptr
!= USER_ADDR_NULL
)
3829 cfil_rw_lock_shared(&cfil_lck_rw
);
3832 * If just asking for the size,
3834 if (req
->oldptr
== USER_ADDR_NULL
) {
3835 req
->oldidx
= cfil_sock_attached_count
*
3836 sizeof(struct cfil_sock_stat
);
3837 /* Bump the length in case new sockets gets attached */
3838 req
->oldidx
+= req
->oldidx
>> 3;
3842 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
3843 struct cfil_entry
*entry
;
3844 struct cfil_sock_stat stat
;
3845 struct socket
*so
= cfi
->cfi_so
;
3847 bzero(&stat
, sizeof(struct cfil_sock_stat
));
3848 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
3849 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
3850 stat
.cfs_flags
= cfi
->cfi_flags
;
3853 stat
.cfs_pid
= so
->last_pid
;
3854 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
3856 if (so
->so_flags
& SOF_DELEGATED
) {
3857 stat
.cfs_e_pid
= so
->e_pid
;
3858 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
3861 stat
.cfs_e_pid
= so
->last_pid
;
3862 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
3867 stat
.cfs_snd
.cbs_pending_first
=
3868 cfi
->cfi_snd
.cfi_pending_first
;
3869 stat
.cfs_snd
.cbs_pending_last
=
3870 cfi
->cfi_snd
.cfi_pending_last
;
3871 stat
.cfs_snd
.cbs_inject_q_len
=
3872 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
3873 stat
.cfs_snd
.cbs_pass_offset
=
3874 cfi
->cfi_snd
.cfi_pass_offset
;
3876 stat
.cfs_rcv
.cbs_pending_first
=
3877 cfi
->cfi_rcv
.cfi_pending_first
;
3878 stat
.cfs_rcv
.cbs_pending_last
=
3879 cfi
->cfi_rcv
.cfi_pending_last
;
3880 stat
.cfs_rcv
.cbs_inject_q_len
=
3881 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
3882 stat
.cfs_rcv
.cbs_pass_offset
=
3883 cfi
->cfi_rcv
.cfi_pass_offset
;
3885 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
3886 struct cfil_entry_stat
*estat
;
3887 struct cfe_buf
*ebuf
;
3888 struct cfe_buf_stat
*sbuf
;
3890 entry
= &cfi
->cfi_entries
[i
];
3892 estat
= &stat
.ces_entries
[i
];
3894 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
3895 estat
->ces_filter_id
= entry
->cfe_filter
?
3896 entry
->cfe_filter
->cf_kcunit
: 0;
3897 estat
->ces_flags
= entry
->cfe_flags
;
3898 estat
->ces_necp_control_unit
=
3899 entry
->cfe_necp_control_unit
;
3901 estat
->ces_last_event
.tv_sec
=
3902 (int64_t)entry
->cfe_last_event
.tv_sec
;
3903 estat
->ces_last_event
.tv_usec
=
3904 (int64_t)entry
->cfe_last_event
.tv_usec
;
3906 estat
->ces_last_action
.tv_sec
=
3907 (int64_t)entry
->cfe_last_action
.tv_sec
;
3908 estat
->ces_last_action
.tv_usec
=
3909 (int64_t)entry
->cfe_last_action
.tv_usec
;
3911 ebuf
= &entry
->cfe_snd
;
3912 sbuf
= &estat
->ces_snd
;
3913 sbuf
->cbs_pending_first
=
3914 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3915 sbuf
->cbs_pending_last
=
3916 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3917 sbuf
->cbs_ctl_first
=
3918 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3919 sbuf
->cbs_ctl_last
=
3920 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3921 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3922 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3923 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3925 ebuf
= &entry
->cfe_rcv
;
3926 sbuf
= &estat
->ces_rcv
;
3927 sbuf
->cbs_pending_first
=
3928 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3929 sbuf
->cbs_pending_last
=
3930 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3931 sbuf
->cbs_ctl_first
=
3932 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3933 sbuf
->cbs_ctl_last
=
3934 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3935 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3936 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3937 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3939 error
= SYSCTL_OUT(req
, &stat
,
3940 sizeof (struct cfil_sock_stat
));
3945 cfil_rw_unlock_shared(&cfil_lck_rw
);