2 * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database specify which TCP/IP sockets needs to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a ingle filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is send by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 * The CFM_OP_DATA_UPDATE action messages let the user space filter
134 * agent allow data to flow up to the specified pass offset -- there
135 * is a pass offset for outgoing data and a pass offset for incoming data.
136 * When a new TCP/IP socket is attached to the content filter, each pass offset
137 * is initially set to 0 so not data is allowed to pass by default.
138 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
139 * then the data flow becomes unrestricted.
141 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
142 * with a pass offset smaller than the pass offset of a previous
143 * CFM_OP_DATA_UPDATE message is silently ignored.
145 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
146 * to tell the kernel how much data it wants to see by using the peek offsets.
147 * Just like pass offsets, there is a peek offset for each direction.
148 * When a new TCP/IP socket is attached to the content filter, each peek offset
149 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
150 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
151 * with a greater than 0 peek offset is sent by the user space filter agent.
152 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
153 * then the flow of update data events becomes unrestricted.
155 * Note that peek offsets cannot be smaller than the corresponding pass offset.
156 * Also a peek offsets cannot be smaller than the corresponding end offset
157 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
158 * to set a too small peek value is silently ignored.
161 * PER SOCKET "struct cfil_info"
163 * As soon as a TCP/IP socket gets attached to a content filter, a
164 * "struct cfil_info" is created to hold the content filtering state for this
167 * The content filtering state is made of the following information
168 * for each direction:
169 * - The current pass offset;
170 * - The first and last offsets of the data pending, waiting for a filtering
172 * - The inject queue for data that passed the filters and that needs
174 * - A content filter specific state in a set of "struct cfil_entry"
177 * CONTENT FILTER STATE "struct cfil_entry"
179 * The "struct cfil_entry" maintains the information most relevant to the
180 * message handling over a kernel control socket with a user space filter agent.
182 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
183 * to the kernel control socket unit it corresponds to and also has a pointer
184 * to the corresponding "struct content_filter".
186 * For each direction, "struct cfil_entry" maintains the following information:
189 * - The offset of the last data peeked at by the filter
190 * - A queue of data that's waiting to be delivered to the user space filter
191 * agent on the kernel control socket
192 * - A queue of data for which event messages have been sent on the kernel
193 * control socket and are pending for a filtering decision.
196 * CONTENT FILTER QUEUES
198 * Data that is being filtered is steered away from the TCP/IP socket buffer
199 * and instead will sit in one of three content filter queue until the data
200 * can be re-injected into the TCP/IP socket buffer.
202 * A content filter queue is represented by "struct cfil_queue" that contains
203 * a list of mbufs and the start and end offset of the data span of
206 * The data moves into the three content filter queues according to this
208 * a) The "cfe_ctl_q" of "struct cfil_entry"
209 * b) The "cfe_pending_q" of "struct cfil_entry"
210 * c) The "cfi_inject_q" of "struct cfil_info"
212 * Note: The seqyence (a),(b) may be repeated several times if there are more
213 * than one content filter attached to the TCP/IP socket.
215 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
216 * kernel conntrol socket for two reasons:
217 * - The peek offset is less that the end offset of the mbuf data
218 * - The kernel control socket is flow controlled
220 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
221 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
222 * socket and are waiting for a pass action message fromn the user space
223 * filter agent. An mbuf length must be fully allowed to pass to be removed
224 * from the cfe_pending_q.
226 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
227 * by the user space filter agent and that needs to be re-injected into the
231 * IMPACT ON FLOW CONTROL
233 * An essential aspect of the content filer subsystem is to minimize the
234 * impact on flow control of the TCP/IP sockets being filtered.
236 * The processing overhead of the content filtering may have an effect on
237 * flow control by adding noticeable delays and cannot be eliminated --
238 * care must be taken by the user space filter agent to minimize the
241 * The amount of data being filtered is kept in buffers while waiting for
242 * a decision by the user space filter agent. This amount of data pending
243 * needs to be subtracted from the amount of data available in the
244 * corresponding TCP/IP socket buffer. This is done by modifying
245 * sbspace() and tcp_sbspace() to account for amount of data pending
246 * in the content filter.
251 * The global state of content filter subsystem is protected by a single
252 * read-write lock "cfil_lck_rw". The data flow can be done with the
253 * cfil read-write lock held as shared so it can be re-entered from multiple
256 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
257 * protected by the socket lock.
259 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
260 * is held. That's why we have some sequences where we drop the cfil read-write
261 * lock before taking the TCP/IP lock.
263 * It is also important to lock the TCP/IP socket buffer while the content
264 * filter is modifying the amount of pending data. Otherwise the calculations
265 * in sbspace() and tcp_sbspace() could be wrong.
267 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
268 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
270 * Actually "cfe_link" and "cfe_filter" are protected by both by
271 * "cfil_lck_rw" and the socket lock: they may be modified only when
272 * "cfil_lck_rw" is exclusive and the socket is locked.
274 * To read the other fields of "struct content_filter" we have to take
275 * "cfil_lck_rw" in shared mode.
280 * - For TCP sockets only
282 * - Does not support TCP unordered messages
294 * If support datagram, enqueue control and address mbufs as well
297 #include <sys/types.h>
298 #include <sys/kern_control.h>
299 #include <sys/queue.h>
300 #include <sys/domain.h>
301 #include <sys/protosw.h>
302 #include <sys/syslog.h>
304 #include <kern/locks.h>
305 #include <kern/zalloc.h>
306 #include <kern/debug.h>
308 #include <net/content_filter.h>
310 #include <netinet/in_pcb.h>
311 #include <netinet/tcp.h>
312 #include <netinet/tcp_var.h>
315 #include <libkern/libkern.h>
318 #define MAX_CONTENT_FILTER 2
323 * The structure content_filter represents a user space content filter
324 * It's created and associated with a kernel control socket instance
326 struct content_filter
{
327 kern_ctl_ref cf_kcref
;
331 uint32_t cf_necp_control_unit
;
333 uint32_t cf_sock_count
;
334 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
337 #define CFF_ACTIVE 0x01
338 #define CFF_DETACHING 0x02
339 #define CFF_FLOW_CONTROLLED 0x04
341 struct content_filter
**content_filters
= NULL
;
342 uint32_t cfil_active_count
= 0; /* Number of active content filters */
343 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
344 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
346 static kern_ctl_ref cfil_kctlref
= NULL
;
348 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
349 static lck_attr_t
*cfil_lck_attr
= NULL
;
350 static lck_grp_t
*cfil_lck_grp
= NULL
;
351 decl_lck_rw_data(static, cfil_lck_rw
);
353 #define CFIL_RW_LCK_MAX 8
355 int cfil_rw_nxt_lck
= 0;
356 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
358 int cfil_rw_nxt_unlck
= 0;
359 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
361 #define CONTENT_FILTER_ZONE_NAME "content_filter"
362 #define CONTENT_FILTER_ZONE_MAX 10
363 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
366 #define CFIL_INFO_ZONE_NAME "cfil_info"
367 #define CFIL_INFO_ZONE_MAX 1024
368 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
370 MBUFQ_HEAD(cfil_mqhead
);
373 uint64_t q_start
; /* offset of first byte in queue */
374 uint64_t q_end
; /* offset of last byte in queue */
375 struct cfil_mqhead q_mq
;
381 * The is one entry per content filter
384 TAILQ_ENTRY(cfil_entry
) cfe_link
;
385 struct content_filter
*cfe_filter
;
387 struct cfil_info
*cfe_cfil_info
;
389 uint32_t cfe_necp_control_unit
;
390 struct timeval cfe_last_event
; /* To user space */
391 struct timeval cfe_last_action
; /* From user space */
395 * cfe_pending_q holds data that has been delivered to
396 * the filter and for which we are waiting for an action
398 struct cfil_queue cfe_pending_q
;
400 * This queue is for data that has not be delivered to
401 * the content filter (new data, pass peek or flow control)
403 struct cfil_queue cfe_ctl_q
;
405 uint64_t cfe_pass_offset
;
406 uint64_t cfe_peek_offset
;
411 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
412 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
413 #define CFEF_DATA_START 0x0004 /* can send data event */
414 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
415 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
416 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
417 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
418 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
423 * There is a struct cfil_info per socket
426 TAILQ_ENTRY(cfil_info
) cfi_link
;
427 struct socket
*cfi_so
;
429 uint64_t cfi_sock_id
;
433 * cfi_pending_first and cfi_pending_last describe the total
434 * amount of data outstanding for all the filters on
435 * this socket and data in the flow queue
436 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
438 uint64_t cfi_pending_first
;
439 uint64_t cfi_pending_last
;
440 int cfi_pending_mbcnt
;
442 * cfi_pass_offset is the minimum of all the filters
444 uint64_t cfi_pass_offset
;
446 * cfi_inject_q holds data that needs to be re-injected
447 * into the socket after filtering and that can
448 * be queued because of flow control
450 struct cfil_queue cfi_inject_q
;
453 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
456 #define CFIF_DROP 0x0001 /* drop action applied */
457 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
458 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
459 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
460 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
461 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
462 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
464 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
465 #define CFI_SHIFT_GENCNT 32
466 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
467 #define CFI_SHIFT_FLOWHASH 0
469 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
471 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
472 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
478 struct cfil_stats cfil_stats
;
481 * For troubleshooting
483 int cfil_log_level
= LOG_ERR
;
487 * Sysctls for logs and statistics
489 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
490 struct sysctl_req
*);
491 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
492 struct sysctl_req
*);
494 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "cfil");
496 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
497 &cfil_log_level
, 0, "");
499 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
502 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
503 &cfil_sock_attached_count
, 0, "");
505 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
506 &cfil_active_count
, 0, "");
508 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
509 &cfil_close_wait_timeout
, 0, "");
511 static int cfil_sbtrim
= 1;
512 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
513 &cfil_sbtrim
, 0, "");
515 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
516 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
518 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
519 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
521 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
522 &cfil_stats
, cfil_stats
, "");
525 * Forward declaration to appease the compiler
527 static int cfil_action_data_pass(struct socket
*, uint32_t, int,
529 static int cfil_action_drop(struct socket
*, uint32_t);
530 static int cfil_dispatch_closed_event(struct socket
*, int);
531 static int cfil_data_common(struct socket
*, int, struct sockaddr
*,
532 struct mbuf
*, struct mbuf
*, uint32_t);
533 static int cfil_data_filter(struct socket
*, uint32_t, int,
534 struct mbuf
*, uint64_t);
535 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
536 struct in_addr
, u_int16_t
);
537 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
538 struct in6_addr
*, u_int16_t
);
539 static int cfil_dispatch_attach_event(struct socket
*, uint32_t);
540 static void cfil_info_free(struct socket
*, struct cfil_info
*);
541 static struct cfil_info
* cfil_info_alloc(struct socket
*);
542 static int cfil_info_attach_unit(struct socket
*, uint32_t);
543 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
);
544 static int cfil_service_pending_queue(struct socket
*, uint32_t, int);
545 static int cfil_data_service_ctl_q(struct socket
*, uint32_t, int);
546 static void cfil_info_verify(struct cfil_info
*);
547 static int cfil_update_data_offsets(struct socket
*, uint32_t, int,
549 static int cfil_acquire_sockbuf(struct socket
*, int);
550 static void cfil_release_sockbuf(struct socket
*, int);
551 static int cfil_filters_attached(struct socket
*);
553 static void cfil_rw_lock_exclusive(lck_rw_t
*);
554 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
555 static void cfil_rw_lock_shared(lck_rw_t
*);
556 static void cfil_rw_unlock_shared(lck_rw_t
*);
557 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
558 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
560 static unsigned int cfil_data_length(struct mbuf
*, int *);
563 * Content filter global read write lock
567 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
571 lr_saved
= __builtin_return_address(0);
573 lck_rw_lock_exclusive(lck
);
575 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
576 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
580 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
584 lr_saved
= __builtin_return_address(0);
586 lck_rw_unlock_exclusive(lck
);
588 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
589 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
593 cfil_rw_lock_shared(lck_rw_t
*lck
)
597 lr_saved
= __builtin_return_address(0);
599 lck_rw_lock_shared(lck
);
601 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
602 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
606 cfil_rw_unlock_shared(lck_rw_t
*lck
)
610 lr_saved
= __builtin_return_address(0);
612 lck_rw_unlock_shared(lck
);
614 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
615 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
619 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
624 lr_saved
= __builtin_return_address(0);
626 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
628 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
629 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
635 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
639 lr_saved
= __builtin_return_address(0);
641 lck_rw_lock_exclusive_to_shared(lck
);
643 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
644 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
648 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
651 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
655 socket_lock_assert_owned(struct socket
*so
)
657 lck_mtx_t
*mutex_held
;
659 if (so
->so_proto
->pr_getlock
!= NULL
)
660 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
662 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
664 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
668 * Return the number of bytes in the mbuf chain using the same
669 * method as m_length() or sballoc()
672 cfil_data_length(struct mbuf
*m
, int *retmbcnt
)
678 if (retmbcnt
== NULL
)
679 return (m_length(m
));
683 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
686 if (m0
->m_flags
& M_EXT
)
687 mbcnt
+= m0
->m_ext
.ext_size
;
694 * Common mbuf queue utilities
698 cfil_queue_init(struct cfil_queue
*cfq
)
702 MBUFQ_INIT(&cfq
->q_mq
);
705 static inline uint64_t
706 cfil_queue_drain(struct cfil_queue
*cfq
)
708 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
711 MBUFQ_DRAIN(&cfq
->q_mq
);
716 /* Return 1 when empty, 0 otherwise */
718 cfil_queue_empty(struct cfil_queue
*cfq
)
720 return (MBUFQ_EMPTY(&cfq
->q_mq
));
723 static inline uint64_t
724 cfil_queue_offset_first(struct cfil_queue
*cfq
)
726 return (cfq
->q_start
);
729 static inline uint64_t
730 cfil_queue_offset_last(struct cfil_queue
*cfq
)
735 static inline uint64_t
736 cfil_queue_len(struct cfil_queue
*cfq
)
738 return (cfq
->q_end
- cfq
->q_start
);
742 * Routines to verify some fundamental assumptions
746 cfil_queue_verify(struct cfil_queue
*cfq
)
750 uint64_t queuesize
= 0;
752 /* Verify offset are ordered */
753 VERIFY(cfq
->q_start
<= cfq
->q_end
);
756 * When queue is empty, the offsets are equal otherwise the offsets
759 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
760 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
761 cfq
->q_start
!= cfq
->q_end
));
763 MBUFQ_FOREACH(m
, &cfq
->q_mq
) {
764 size_t chainsize
= 0;
765 unsigned int mlen
= m_length(m
);
767 if (m
== (void *)M_TAG_FREE_PATTERN
||
768 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
769 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
)
770 panic("%s - mq %p is free at %p", __func__
,
772 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
773 if (n
->m_type
!= MT_DATA
&&
774 n
->m_type
!= MT_HEADER
&&
775 n
->m_type
!= MT_OOBDATA
)
776 panic("%s - %p unsupported type %u", __func__
,
778 chainsize
+= n
->m_len
;
780 if (mlen
!= chainsize
)
781 panic("%s - %p m_length() %u != chainsize %lu",
782 __func__
, m
, mlen
, chainsize
);
783 queuesize
+= chainsize
;
785 if (queuesize
!= cfq
->q_end
- cfq
->q_start
)
786 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
787 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
791 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
793 CFIL_QUEUE_VERIFY(cfq
);
795 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
798 CFIL_QUEUE_VERIFY(cfq
);
802 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
804 CFIL_QUEUE_VERIFY(cfq
);
806 VERIFY(m_length(m
) == len
);
808 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
809 MBUFQ_NEXT(m
) = NULL
;
812 CFIL_QUEUE_VERIFY(cfq
);
816 cfil_queue_first(struct cfil_queue
*cfq
)
818 return (MBUFQ_FIRST(&cfq
->q_mq
));
822 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
825 return (MBUFQ_NEXT(m
));
829 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
831 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
832 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
834 /* Verify the queues are ordered so that pending is before ctl */
835 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
837 /* The peek offset cannot be less than the pass offset */
838 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
840 /* Make sure we've updated the offset we peeked at */
841 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
845 cfil_entry_verify(struct cfil_entry
*entry
)
847 cfil_entry_buf_verify(&entry
->cfe_snd
);
848 cfil_entry_buf_verify(&entry
->cfe_rcv
);
852 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
854 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
856 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
857 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
861 cfil_info_verify(struct cfil_info
*cfil_info
)
865 if (cfil_info
== NULL
)
868 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
869 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
871 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++)
872 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
876 verify_content_filter(struct content_filter
*cfc
)
878 struct cfil_entry
*entry
;
881 VERIFY(cfc
->cf_sock_count
>= 0);
883 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
885 VERIFY(cfc
== entry
->cfe_filter
);
887 VERIFY(count
== cfc
->cf_sock_count
);
891 * Kernel control socket callbacks
894 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
898 struct content_filter
*cfc
= NULL
;
900 CFIL_LOG(LOG_NOTICE
, "");
902 cfc
= zalloc(content_filter_zone
);
904 CFIL_LOG(LOG_ERR
, "zalloc failed");
908 bzero(cfc
, sizeof(struct content_filter
));
910 cfil_rw_lock_exclusive(&cfil_lck_rw
);
911 if (content_filters
== NULL
) {
912 struct content_filter
**tmp
;
914 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
917 struct content_filter
**,
918 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
922 cfil_rw_lock_exclusive(&cfil_lck_rw
);
924 if (tmp
== NULL
&& content_filters
== NULL
) {
926 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
929 /* Another thread may have won the race */
930 if (content_filters
!= NULL
)
933 content_filters
= tmp
;
936 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
937 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
939 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
940 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
944 * kernel control socket kcunit numbers start at 1
946 content_filters
[sac
->sc_unit
- 1] = cfc
;
948 cfc
->cf_kcref
= kctlref
;
949 cfc
->cf_kcunit
= sac
->sc_unit
;
950 TAILQ_INIT(&cfc
->cf_sock_entries
);
955 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
957 if (error
!= 0 && cfc
!= NULL
)
958 zfree(content_filter_zone
, cfc
);
961 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
963 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
965 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
966 error
, cfil_active_count
, sac
->sc_unit
);
972 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
974 #pragma unused(kctlref)
976 struct content_filter
*cfc
;
977 struct cfil_entry
*entry
;
979 CFIL_LOG(LOG_NOTICE
, "");
981 if (content_filters
== NULL
) {
982 CFIL_LOG(LOG_ERR
, "no content filter");
986 if (kcunit
> MAX_CONTENT_FILTER
) {
987 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
988 kcunit
, MAX_CONTENT_FILTER
);
993 cfc
= (struct content_filter
*)unitinfo
;
997 cfil_rw_lock_exclusive(&cfil_lck_rw
);
998 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
999 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1001 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1004 cfc
->cf_flags
|= CFF_DETACHING
;
1006 * Remove all sockets from the filter
1008 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1009 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1011 verify_content_filter(cfc
);
1013 * Accept all outstanding data by pushing to next filter
1016 * TBD: Actually we should make sure all data has been pushed
1019 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1020 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1021 struct socket
*so
= cfil_info
->cfi_so
;
1023 /* Need to let data flow immediately */
1024 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1028 * Respect locking hierarchy
1030 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1035 * When cfe_filter is NULL the filter is detached
1036 * and the entry has been removed from cf_sock_entries
1038 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
) {
1039 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1042 (void) cfil_action_data_pass(so
, kcunit
, 1,
1046 (void) cfil_action_data_pass(so
, kcunit
, 0,
1050 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1053 * Check again as the socket may have been unlocked
1054 * when when calling cfil_acquire_sockbuf()
1056 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
)
1059 /* The filter is now detached */
1060 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1061 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1062 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1064 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1065 cfil_filters_attached(so
) == 0) {
1066 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1067 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1068 wakeup((caddr_t
)&so
->so_cfil
);
1072 * Remove the filter entry from the content filter
1073 * but leave the rest of the state intact as the queues
1074 * may not be empty yet
1076 entry
->cfe_filter
= NULL
;
1077 entry
->cfe_necp_control_unit
= 0;
1079 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1080 cfc
->cf_sock_count
--;
1082 socket_unlock(so
, 1);
1085 verify_content_filter(cfc
);
1087 VERIFY(cfc
->cf_sock_count
== 0);
1090 * Make filter inactive
1092 content_filters
[kcunit
- 1] = NULL
;
1093 cfil_active_count
--;
1094 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1096 zfree(content_filter_zone
, cfc
);
1099 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1101 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1103 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1104 error
, cfil_active_count
, kcunit
);
1110 * cfil_acquire_sockbuf()
1112 * Prevent any other thread from acquiring the sockbuf
1113 * We use sb_cfil_thread as a semaphore to prevent other threads from
1114 * messing with the sockbuf -- see sblock()
1115 * Note: We do not set SB_LOCK here because the thread may check or modify
1116 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1117 * sblock(), sbunlock() or sodefunct()
1120 cfil_acquire_sockbuf(struct socket
*so
, int outgoing
)
1122 thread_t tp
= current_thread();
1123 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1124 lck_mtx_t
*mutex_held
;
1128 * Wait until no thread is holding the sockbuf and other content
1129 * filter threads have released the sockbuf
1131 while ((sb
->sb_flags
& SB_LOCK
) ||
1132 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1133 if (so
->so_proto
->pr_getlock
!= NULL
)
1134 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
1136 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1138 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1141 VERIFY(sb
->sb_wantlock
!= 0);
1143 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1146 VERIFY(sb
->sb_wantlock
!= 0);
1150 * Use reference count for repetitive calls on same thread
1152 if (sb
->sb_cfil_refs
== 0) {
1153 VERIFY(sb
->sb_cfil_thread
== NULL
);
1154 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1156 sb
->sb_cfil_thread
= tp
;
1157 sb
->sb_flags
|= SB_LOCK
;
1161 /* We acquire the socket buffer when we need to cleanup */
1162 if (so
->so_cfil
== NULL
) {
1163 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1164 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1166 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1167 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1168 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1176 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1178 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1179 thread_t tp
= current_thread();
1181 socket_lock_assert_owned(so
);
1183 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)
1184 panic("%s sb_cfil_thread %p not current %p", __func__
,
1185 sb
->sb_cfil_thread
, tp
);
1187 * Don't panic if we are defunct because SB_LOCK has
1188 * been cleared by sodefunct()
1190 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
))
1191 panic("%s SB_LOCK not set on %p", __func__
,
1194 * We can unlock when the thread unwinds to the last reference
1197 if (sb
->sb_cfil_refs
== 0) {
1198 sb
->sb_cfil_thread
= NULL
;
1199 sb
->sb_flags
&= ~SB_LOCK
;
1201 if (sb
->sb_wantlock
> 0)
1202 wakeup(&sb
->sb_flags
);
1207 cfil_sock_id_from_socket(struct socket
*so
)
1209 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
)
1210 return (so
->so_cfil
->cfi_sock_id
);
1212 return (CFIL_SOCK_ID_NONE
);
1215 static struct socket
*
1216 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
)
1218 struct socket
*so
= NULL
;
1219 u_int64_t gencnt
= cfil_sock_id
>> 32;
1220 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1221 struct inpcb
*inp
= NULL
;
1222 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1224 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1225 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1226 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1227 inp
->inp_socket
!= NULL
&&
1228 inp
->inp_flowhash
== flowhash
&&
1229 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1230 inp
->inp_socket
->so_cfil
!= NULL
) {
1231 so
= inp
->inp_socket
;
1235 lck_rw_done(pcbinfo
->ipi_lock
);
1238 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1240 "no socket for sock_id %llx gencnt %llx flowhash %x",
1241 cfil_sock_id
, gencnt
, flowhash
);
1248 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1251 #pragma unused(kctlref, flags)
1253 struct cfil_msg_hdr
*msghdr
;
1254 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1256 struct cfil_msg_action
*action_msg
;
1257 struct cfil_entry
*entry
;
1259 CFIL_LOG(LOG_INFO
, "");
1261 if (content_filters
== NULL
) {
1262 CFIL_LOG(LOG_ERR
, "no content filter");
1266 if (kcunit
> MAX_CONTENT_FILTER
) {
1267 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1268 kcunit
, MAX_CONTENT_FILTER
);
1273 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1274 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1278 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1279 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1280 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1284 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1285 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1289 /* Validate action operation */
1290 switch (msghdr
->cfm_op
) {
1291 case CFM_OP_DATA_UPDATE
:
1293 &cfil_stats
.cfs_ctl_action_data_update
);
1296 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1299 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1300 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1304 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1305 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1307 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1312 cfil_rw_lock_shared(&cfil_lck_rw
);
1313 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1314 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1317 cfil_rw_unlock_shared(&cfil_lck_rw
);
1321 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
);
1323 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1324 msghdr
->cfm_sock_id
);
1326 cfil_rw_unlock_shared(&cfil_lck_rw
);
1329 cfil_rw_unlock_shared(&cfil_lck_rw
);
1333 if (so
->so_cfil
== NULL
) {
1334 CFIL_LOG(LOG_NOTICE
, "so %llx not attached",
1335 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1338 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1339 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1340 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1344 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
1345 if (entry
->cfe_filter
== NULL
) {
1346 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1347 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1352 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)
1353 entry
->cfe_flags
|= CFEF_DATA_START
;
1356 "so %llx attached not sent for %u",
1357 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1362 microuptime(&entry
->cfe_last_action
);
1364 action_msg
= (struct cfil_msg_action
*)msghdr
;
1366 switch (msghdr
->cfm_op
) {
1367 case CFM_OP_DATA_UPDATE
:
1368 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1369 action_msg
->cfa_out_pass_offset
!= 0)
1370 error
= cfil_action_data_pass(so
, kcunit
, 1,
1371 action_msg
->cfa_out_pass_offset
,
1372 action_msg
->cfa_out_peek_offset
);
1373 if (error
== EJUSTRETURN
)
1377 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1378 action_msg
->cfa_in_pass_offset
!= 0)
1379 error
= cfil_action_data_pass(so
, kcunit
, 0,
1380 action_msg
->cfa_in_pass_offset
,
1381 action_msg
->cfa_in_peek_offset
);
1382 if (error
== EJUSTRETURN
)
1387 error
= cfil_action_drop(so
, kcunit
);
1395 socket_unlock(so
, 1);
1400 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1402 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1408 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1409 int opt
, void *data
, size_t *len
)
1411 #pragma unused(kctlref, opt)
1413 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1415 CFIL_LOG(LOG_NOTICE
, "");
1417 cfil_rw_lock_shared(&cfil_lck_rw
);
1419 if (content_filters
== NULL
) {
1420 CFIL_LOG(LOG_ERR
, "no content filter");
1424 if (kcunit
> MAX_CONTENT_FILTER
) {
1425 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1426 kcunit
, MAX_CONTENT_FILTER
);
1430 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1431 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1437 case CFIL_OPT_NECP_CONTROL_UNIT
:
1438 if (*len
< sizeof(uint32_t)) {
1439 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1444 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1447 error
= ENOPROTOOPT
;
1451 cfil_rw_unlock_shared(&cfil_lck_rw
);
1457 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1458 int opt
, void *data
, size_t len
)
1460 #pragma unused(kctlref, opt)
1462 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1464 CFIL_LOG(LOG_NOTICE
, "");
1466 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1468 if (content_filters
== NULL
) {
1469 CFIL_LOG(LOG_ERR
, "no content filter");
1473 if (kcunit
> MAX_CONTENT_FILTER
) {
1474 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1475 kcunit
, MAX_CONTENT_FILTER
);
1479 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1480 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1486 case CFIL_OPT_NECP_CONTROL_UNIT
:
1487 if (len
< sizeof(uint32_t)) {
1488 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1489 "len too small %lu", len
);
1493 if (cfc
->cf_necp_control_unit
!= 0) {
1494 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1496 cfc
->cf_necp_control_unit
);
1500 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1503 error
= ENOPROTOOPT
;
1507 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1514 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1516 #pragma unused(kctlref, flags)
1517 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1518 struct socket
*so
= NULL
;
1520 struct cfil_entry
*entry
;
1522 CFIL_LOG(LOG_INFO
, "");
1524 if (content_filters
== NULL
) {
1525 CFIL_LOG(LOG_ERR
, "no content filter");
1526 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1529 if (kcunit
> MAX_CONTENT_FILTER
) {
1530 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1531 kcunit
, MAX_CONTENT_FILTER
);
1532 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1535 cfil_rw_lock_shared(&cfil_lck_rw
);
1536 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1537 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1539 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1542 /* Let's assume the flow control is lifted */
1543 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1544 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
1545 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1547 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1549 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1550 lck_rw_assert(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1553 * Flow control will be raised again as soon as an entry cannot enqueue
1554 * to the kernel control socket
1556 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1557 verify_content_filter(cfc
);
1559 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
1561 /* Find an entry that is flow controlled */
1562 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1563 if (entry
->cfe_cfil_info
== NULL
||
1564 entry
->cfe_cfil_info
->cfi_so
== NULL
)
1566 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0)
1572 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
1574 so
= entry
->cfe_cfil_info
->cfi_so
;
1576 cfil_rw_unlock_shared(&cfil_lck_rw
);
1580 error
= cfil_acquire_sockbuf(so
, 1);
1582 error
= cfil_data_service_ctl_q(so
, kcunit
, 1);
1583 cfil_release_sockbuf(so
, 1);
1587 error
= cfil_acquire_sockbuf(so
, 0);
1589 error
= cfil_data_service_ctl_q(so
, kcunit
, 0);
1590 cfil_release_sockbuf(so
, 0);
1593 socket_lock_assert_owned(so
);
1594 socket_unlock(so
, 1);
1596 cfil_rw_lock_shared(&cfil_lck_rw
);
1599 cfil_rw_unlock_shared(&cfil_lck_rw
);
1605 struct kern_ctl_reg kern_ctl
;
1607 vm_size_t content_filter_size
= 0; /* size of content_filter */
1608 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
1610 CFIL_LOG(LOG_NOTICE
, "");
1613 * Compile time verifications
1615 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
1616 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
1617 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
1618 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
1621 * Runtime time verifications
1623 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
1625 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
1627 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
1629 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
1632 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
1634 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
1637 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
1639 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
1641 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
1643 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
1647 * Zone for content filters kernel control sockets
1649 content_filter_size
= sizeof(struct content_filter
);
1650 content_filter_zone
= zinit(content_filter_size
,
1651 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
1653 CONTENT_FILTER_ZONE_NAME
);
1654 if (content_filter_zone
== NULL
) {
1655 panic("%s: zinit(%s) failed", __func__
,
1656 CONTENT_FILTER_ZONE_NAME
);
1659 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
1660 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
1663 * Zone for per socket content filters
1665 cfil_info_size
= sizeof(struct cfil_info
);
1666 cfil_info_zone
= zinit(cfil_info_size
,
1667 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
1669 CFIL_INFO_ZONE_NAME
);
1670 if (cfil_info_zone
== NULL
) {
1671 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
1674 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
1675 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
1680 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
1681 if (cfil_lck_grp_attr
== NULL
) {
1682 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
1685 cfil_lck_grp
= lck_grp_alloc_init("content filter",
1687 if (cfil_lck_grp
== NULL
) {
1688 panic("%s: lck_grp_alloc_init failed", __func__
);
1691 cfil_lck_attr
= lck_attr_alloc_init();
1692 if (cfil_lck_attr
== NULL
) {
1693 panic("%s: lck_attr_alloc_init failed", __func__
);
1696 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
1698 TAILQ_INIT(&cfil_sock_head
);
1701 * Register kernel control
1703 bzero(&kern_ctl
, sizeof(kern_ctl
));
1704 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
1705 sizeof(kern_ctl
.ctl_name
));
1706 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
1707 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
1708 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
1709 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
1710 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
1711 kern_ctl
.ctl_send
= cfil_ctl_send
;
1712 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
1713 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
1714 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
1715 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
1717 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
1723 cfil_info_alloc(struct socket
*so
)
1726 struct cfil_info
*cfil_info
= NULL
;
1727 struct inpcb
*inp
= sotoinpcb(so
);
1729 CFIL_LOG(LOG_INFO
, "");
1731 socket_lock_assert_owned(so
);
1733 cfil_info
= zalloc(cfil_info_zone
);
1734 if (cfil_info
== NULL
)
1736 bzero(cfil_info
, sizeof(struct cfil_info
));
1738 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
1739 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1741 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1742 struct cfil_entry
*entry
;
1744 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1745 entry
->cfe_cfil_info
= cfil_info
;
1747 /* Initialize the filter entry */
1748 entry
->cfe_filter
= NULL
;
1749 entry
->cfe_flags
= 0;
1750 entry
->cfe_necp_control_unit
= 0;
1751 entry
->cfe_snd
.cfe_pass_offset
= 0;
1752 entry
->cfe_snd
.cfe_peek_offset
= 0;
1753 entry
->cfe_snd
.cfe_peeked
= 0;
1754 entry
->cfe_rcv
.cfe_pass_offset
= 0;
1755 entry
->cfe_rcv
.cfe_peek_offset
= 0;
1756 entry
->cfe_rcv
.cfe_peeked
= 0;
1758 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
1759 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
1760 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
1761 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
1764 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1766 so
->so_cfil
= cfil_info
;
1767 cfil_info
->cfi_so
= so
;
1769 * Create a cfi_sock_id that's not the socket pointer!
1771 if (inp
->inp_flowhash
== 0)
1772 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
1773 cfil_info
->cfi_sock_id
=
1774 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
1776 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
1778 cfil_sock_attached_count
++;
1780 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1783 if (cfil_info
!= NULL
)
1784 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
1786 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
1792 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
)
1795 struct cfil_info
*cfil_info
= so
->so_cfil
;
1798 CFIL_LOG(LOG_INFO
, "");
1800 socket_lock_assert_owned(so
);
1802 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1805 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1807 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
1808 struct cfil_entry
*entry
;
1812 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
1815 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1817 entry
->cfe_filter
= cfc
;
1818 entry
->cfe_necp_control_unit
= filter_control_unit
;
1819 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1820 cfc
->cf_sock_count
++;
1821 verify_content_filter(cfc
);
1823 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
1827 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1833 cfil_info_free(struct socket
*so
, struct cfil_info
*cfil_info
)
1836 uint64_t in_drain
= 0;
1837 uint64_t out_drained
= 0;
1841 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
1842 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
1845 if (cfil_info
== NULL
)
1848 CFIL_LOG(LOG_INFO
, "");
1850 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1853 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1855 struct cfil_entry
*entry
;
1856 struct content_filter
*cfc
;
1858 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1860 /* Don't be silly and try to detach twice */
1861 if (entry
->cfe_filter
== NULL
)
1864 cfc
= content_filters
[kcunit
- 1];
1866 VERIFY(cfc
== entry
->cfe_filter
);
1868 entry
->cfe_filter
= NULL
;
1869 entry
->cfe_necp_control_unit
= 0;
1870 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1871 cfc
->cf_sock_count
--;
1873 verify_content_filter(cfc
);
1875 cfil_sock_attached_count
--;
1876 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
1878 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
1879 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1881 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1882 struct cfil_entry
*entry
;
1884 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1885 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
1886 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
1887 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
1888 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
1890 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1893 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
1895 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
1897 zfree(cfil_info_zone
, cfil_info
);
1901 * Entry point from Sockets layer
1902 * The socket is locked.
1905 cfil_sock_attach(struct socket
*so
)
1908 uint32_t filter_control_unit
;
1910 socket_lock_assert_owned(so
);
1912 /* Limit ourselves to TCP */
1913 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
1914 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
1915 so
->so_proto
->pr_type
!= SOCK_STREAM
||
1916 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
)
1919 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
1920 if (filter_control_unit
== 0)
1923 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
1924 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
1927 if (cfil_active_count
== 0) {
1928 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
1931 if (so
->so_cfil
!= NULL
) {
1932 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
1933 CFIL_LOG(LOG_ERR
, "already attached");
1935 cfil_info_alloc(so
);
1936 if (so
->so_cfil
== NULL
) {
1938 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
1942 if (cfil_info_attach_unit(so
, filter_control_unit
) == 0) {
1943 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
1944 filter_control_unit
);
1945 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
1948 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockid %llx",
1949 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1950 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
1952 so
->so_flags
|= SOF_CONTENT_FILTER
;
1953 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
1955 /* Hold a reference on the socket */
1958 error
= cfil_dispatch_attach_event(so
, filter_control_unit
);
1959 /* We can recover from flow control or out of memory errors */
1960 if (error
== ENOBUFS
|| error
== ENOMEM
)
1962 else if (error
!= 0)
1965 CFIL_INFO_VERIFY(so
->so_cfil
);
1971 * Entry point from Sockets layer
1972 * The socket is locked.
1975 cfil_sock_detach(struct socket
*so
)
1978 cfil_info_free(so
, so
->so_cfil
);
1979 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
1985 cfil_dispatch_attach_event(struct socket
*so
, uint32_t filter_control_unit
)
1988 struct cfil_entry
*entry
= NULL
;
1989 struct cfil_msg_sock_attached msg_attached
;
1991 struct content_filter
*cfc
;
1993 socket_lock_assert_owned(so
);
1995 cfil_rw_lock_shared(&cfil_lck_rw
);
1997 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2002 * Find the matching filter unit
2004 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2005 cfc
= content_filters
[kcunit
- 1];
2009 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2011 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2012 if (entry
->cfe_filter
== NULL
)
2015 VERIFY(cfc
== entry
->cfe_filter
);
2020 if (entry
== NULL
|| entry
->cfe_filter
== NULL
)
2023 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
))
2026 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2027 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2029 /* Would be wasteful to try when flow controlled */
2030 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2035 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2036 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2037 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2038 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2039 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2040 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2042 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2043 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2044 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2045 msg_attached
.cfs_pid
= so
->last_pid
;
2046 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2047 if (so
->so_flags
& SOF_DELEGATED
) {
2048 msg_attached
.cfs_e_pid
= so
->e_pid
;
2049 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2051 msg_attached
.cfs_e_pid
= so
->last_pid
;
2052 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2054 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2055 entry
->cfe_filter
->cf_kcunit
,
2057 sizeof(struct cfil_msg_sock_attached
),
2060 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2063 microuptime(&entry
->cfe_last_event
);
2064 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2065 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2068 /* We can recover from flow control */
2069 if (error
== ENOBUFS
) {
2070 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2071 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2073 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2074 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2076 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2078 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2081 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2083 cfil_rw_unlock_shared(&cfil_lck_rw
);
2089 cfil_dispatch_disconnect_event(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2092 struct mbuf
*msg
= NULL
;
2093 struct cfil_entry
*entry
;
2094 struct cfe_buf
*entrybuf
;
2095 struct cfil_msg_hdr msg_disconnected
;
2096 struct content_filter
*cfc
;
2098 socket_lock_assert_owned(so
);
2100 cfil_rw_lock_shared(&cfil_lck_rw
);
2102 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2104 entrybuf
= &entry
->cfe_snd
;
2106 entrybuf
= &entry
->cfe_rcv
;
2108 cfc
= entry
->cfe_filter
;
2112 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2113 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2116 * Send the disconnection event once
2118 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2119 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2120 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2121 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2126 * We're not disconnected as long as some data is waiting
2127 * to be delivered to the filter
2129 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2130 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2131 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2135 /* Would be wasteful to try when flow controlled */
2136 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2141 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2142 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2143 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2144 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2145 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2146 CFM_OP_DISCONNECT_IN
;
2147 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2148 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2149 entry
->cfe_filter
->cf_kcunit
,
2151 sizeof(struct cfil_msg_hdr
),
2154 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2158 microuptime(&entry
->cfe_last_event
);
2160 /* Remember we have sent the disconnection message */
2162 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2163 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2165 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2166 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2169 if (error
== ENOBUFS
) {
2170 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2172 &cfil_stats
.cfs_disconnect_event_flow_control
);
2174 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2175 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2177 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2179 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2183 &cfil_stats
.cfs_disconnect_event_fail
);
2185 cfil_rw_unlock_shared(&cfil_lck_rw
);
2191 cfil_dispatch_closed_event(struct socket
*so
, int kcunit
)
2193 struct cfil_entry
*entry
;
2194 struct cfil_msg_hdr msg_closed
;
2196 struct content_filter
*cfc
;
2198 socket_lock_assert_owned(so
);
2200 cfil_rw_lock_shared(&cfil_lck_rw
);
2202 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2203 cfc
= entry
->cfe_filter
;
2207 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2208 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2210 /* Would be wasteful to try when flow controlled */
2211 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2216 * Send a single closed message per filter
2218 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0)
2220 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
2223 bzero(&msg_closed
, sizeof(struct cfil_msg_hdr
));
2224 msg_closed
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2225 msg_closed
.cfm_version
= CFM_VERSION_CURRENT
;
2226 msg_closed
.cfm_type
= CFM_TYPE_EVENT
;
2227 msg_closed
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2228 msg_closed
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2229 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2230 entry
->cfe_filter
->cf_kcunit
,
2232 sizeof(struct cfil_msg_hdr
),
2235 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2239 microuptime(&entry
->cfe_last_event
);
2240 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2241 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2243 /* We can recover from flow control */
2244 if (error
== ENOBUFS
) {
2245 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2246 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2248 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2249 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2251 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2253 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2256 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2258 cfil_rw_unlock_shared(&cfil_lck_rw
);
2265 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2266 struct in6_addr
*ip6
, u_int16_t port
)
2268 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2270 sin6
->sin6_family
= AF_INET6
;
2271 sin6
->sin6_len
= sizeof(*sin6
);
2272 sin6
->sin6_port
= port
;
2273 sin6
->sin6_addr
= *ip6
;
2274 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2275 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2276 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2281 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2282 struct in_addr ip
, u_int16_t port
)
2284 struct sockaddr_in
*sin
= &sin46
->sin
;
2286 sin
->sin_family
= AF_INET
;
2287 sin
->sin_len
= sizeof(*sin
);
2288 sin
->sin_port
= port
;
2289 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2293 cfil_dispatch_data_event(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2294 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2297 struct mbuf
*copy
= NULL
;
2298 struct mbuf
*msg
= NULL
;
2299 unsigned int one
= 1;
2300 struct cfil_msg_data_event
*data_req
;
2302 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2303 struct cfil_entry
*entry
;
2304 struct cfe_buf
*entrybuf
;
2305 struct content_filter
*cfc
;
2307 cfil_rw_lock_shared(&cfil_lck_rw
);
2309 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2311 entrybuf
= &entry
->cfe_snd
;
2313 entrybuf
= &entry
->cfe_rcv
;
2315 cfc
= entry
->cfe_filter
;
2319 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2320 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2322 socket_lock_assert_owned(so
);
2324 /* Would be wasteful to try */
2325 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2330 /* Make a copy of the data to pass to kernel control socket */
2331 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2334 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2339 /* We need an mbuf packet for the message header */
2340 hdrsize
= sizeof(struct cfil_msg_data_event
);
2341 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2343 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2346 * ENOBUFS is to indicate flow control
2351 mbuf_setlen(msg
, hdrsize
);
2352 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2354 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2355 bzero(data_req
, hdrsize
);
2356 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2357 data_req
->cfd_msghdr
.cfm_version
= 1;
2358 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2359 data_req
->cfd_msghdr
.cfm_op
=
2360 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2361 data_req
->cfd_msghdr
.cfm_sock_id
=
2362 entry
->cfe_cfil_info
->cfi_sock_id
;
2363 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2364 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
2368 * For non connected sockets need to copy addresses from passed
2371 if (inp
->inp_vflag
& INP_IPV6
) {
2373 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2374 &inp
->in6p_laddr
, inp
->inp_lport
);
2375 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2376 &inp
->in6p_faddr
, inp
->inp_fport
);
2378 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2379 &inp
->in6p_faddr
, inp
->inp_fport
);
2380 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2381 &inp
->in6p_laddr
, inp
->inp_lport
);
2383 } else if (inp
->inp_vflag
& INP_IPV4
) {
2385 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2386 inp
->inp_laddr
, inp
->inp_lport
);
2387 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2388 inp
->inp_faddr
, inp
->inp_fport
);
2390 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2391 inp
->inp_faddr
, inp
->inp_fport
);
2392 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2393 inp
->inp_laddr
, inp
->inp_lport
);
2397 /* Pass the message to the content filter */
2398 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
2399 entry
->cfe_filter
->cf_kcunit
,
2402 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2406 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
2407 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
2409 if (error
== ENOBUFS
) {
2410 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2412 &cfil_stats
.cfs_data_event_flow_control
);
2414 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2415 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2417 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2419 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2422 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
2424 cfil_rw_unlock_shared(&cfil_lck_rw
);
2430 * Process the queue of data waiting to be delivered to content filter
2433 cfil_data_service_ctl_q(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2436 struct mbuf
*data
, *tmp
= NULL
;
2437 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
2438 struct cfil_entry
*entry
;
2439 struct cfe_buf
*entrybuf
;
2440 uint64_t currentoffset
= 0;
2442 if (so
->so_cfil
== NULL
)
2445 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2446 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2448 socket_lock_assert_owned(so
);
2450 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2452 entrybuf
= &entry
->cfe_snd
;
2454 entrybuf
= &entry
->cfe_rcv
;
2456 /* Send attached message if not yet done */
2457 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
2458 error
= cfil_dispatch_attach_event(so
, kcunit
);
2460 /* We can recover from flow control */
2461 if (error
== ENOBUFS
|| error
== ENOMEM
)
2465 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
2466 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
2469 CFIL_LOG(LOG_DEBUG
, "pass_offset %llu peeked %llu peek_offset %llu",
2470 entrybuf
->cfe_pass_offset
,
2471 entrybuf
->cfe_peeked
,
2472 entrybuf
->cfe_peek_offset
);
2474 /* Move all data that can pass */
2475 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
2476 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
2477 datalen
= cfil_data_length(data
, NULL
);
2480 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
2481 entrybuf
->cfe_pass_offset
) {
2483 * The first mbuf can fully pass
2488 * The first mbuf can partially pass
2490 copylen
= entrybuf
->cfe_pass_offset
-
2491 entrybuf
->cfe_ctl_q
.q_start
;
2493 VERIFY(copylen
<= datalen
);
2496 "%llx first %llu peeked %llu pass %llu peek %llu"
2497 "datalen %u copylen %u",
2498 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2499 entrybuf
->cfe_ctl_q
.q_start
,
2500 entrybuf
->cfe_peeked
,
2501 entrybuf
->cfe_pass_offset
,
2502 entrybuf
->cfe_peek_offset
,
2506 * Data that passes has been peeked at explicitly or
2509 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
2510 entrybuf
->cfe_peeked
)
2511 entrybuf
->cfe_peeked
=
2512 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
2514 * Stop on partial pass
2516 if (copylen
< datalen
)
2519 /* All good, move full data from ctl queue to pending queue */
2520 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2522 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
2524 OSAddAtomic64(datalen
,
2525 &cfil_stats
.cfs_pending_q_out_enqueued
);
2527 OSAddAtomic64(datalen
,
2528 &cfil_stats
.cfs_pending_q_in_enqueued
);
2530 CFIL_INFO_VERIFY(so
->so_cfil
);
2533 "%llx first %llu peeked %llu pass %llu peek %llu"
2534 "datalen %u copylen %u",
2535 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2536 entrybuf
->cfe_ctl_q
.q_start
,
2537 entrybuf
->cfe_peeked
,
2538 entrybuf
->cfe_pass_offset
,
2539 entrybuf
->cfe_peek_offset
,
2543 /* Now deal with remaining data the filter wants to peek at */
2544 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
2545 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
2546 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
2547 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
2548 currentoffset
+= datalen
) {
2549 datalen
= cfil_data_length(data
, NULL
);
2552 /* We've already peeked at this mbuf */
2553 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
)
2556 * The data in the first mbuf may have been
2557 * partially peeked at
2559 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
2560 VERIFY(copyoffset
< datalen
);
2561 copylen
= datalen
- copyoffset
;
2562 VERIFY(copylen
<= datalen
);
2564 * Do not copy more than needed
2566 if (currentoffset
+ copyoffset
+ copylen
>
2567 entrybuf
->cfe_peek_offset
) {
2568 copylen
= entrybuf
->cfe_peek_offset
-
2569 (currentoffset
+ copyoffset
);
2573 "%llx current %llu peeked %llu pass %llu peek %llu"
2574 "datalen %u copylen %u copyoffset %u",
2575 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2577 entrybuf
->cfe_peeked
,
2578 entrybuf
->cfe_pass_offset
,
2579 entrybuf
->cfe_peek_offset
,
2580 datalen
, copylen
, copyoffset
);
2583 * Stop if there is nothing more to peek at
2588 * Let the filter get a peek at this span of data
2590 error
= cfil_dispatch_data_event(so
, kcunit
,
2591 outgoing
, data
, copyoffset
, copylen
);
2593 /* On error, leave data in ctl_q */
2596 entrybuf
->cfe_peeked
+= copylen
;
2598 OSAddAtomic64(copylen
,
2599 &cfil_stats
.cfs_ctl_q_out_peeked
);
2601 OSAddAtomic64(copylen
,
2602 &cfil_stats
.cfs_ctl_q_in_peeked
);
2604 /* Stop when data could not be fully peeked at */
2605 if (copylen
+ copyoffset
< datalen
)
2608 CFIL_INFO_VERIFY(so
->so_cfil
);
2611 "%llx first %llu peeked %llu pass %llu peek %llu"
2612 "datalen %u copylen %u copyoffset %u",
2613 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2615 entrybuf
->cfe_peeked
,
2616 entrybuf
->cfe_pass_offset
,
2617 entrybuf
->cfe_peek_offset
,
2618 datalen
, copylen
, copyoffset
);
2621 * Process data that has passed the filter
2623 error
= cfil_service_pending_queue(so
, kcunit
, outgoing
);
2625 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
2631 * Dispatch disconnect events that could not be sent
2633 if (so
->so_cfil
== NULL
)
2635 else if (outgoing
) {
2636 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) &&
2637 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
))
2638 cfil_dispatch_disconnect_event(so
, kcunit
, 1);
2640 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) &&
2641 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))
2642 cfil_dispatch_disconnect_event(so
, kcunit
, 0);
2647 "first %llu peeked %llu pass %llu peek %llu",
2648 entrybuf
->cfe_ctl_q
.q_start
,
2649 entrybuf
->cfe_peeked
,
2650 entrybuf
->cfe_pass_offset
,
2651 entrybuf
->cfe_peek_offset
);
2653 CFIL_INFO_VERIFY(so
->so_cfil
);
2658 * cfil_data_filter()
2660 * Process data for a content filter installed on a socket
2663 cfil_data_filter(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2664 struct mbuf
*data
, uint64_t datalen
)
2667 struct cfil_entry
*entry
;
2668 struct cfe_buf
*entrybuf
;
2670 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2671 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2673 socket_lock_assert_owned(so
);
2675 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2677 entrybuf
= &entry
->cfe_snd
;
2679 entrybuf
= &entry
->cfe_rcv
;
2681 /* Are we attached to the filter? */
2682 if (entry
->cfe_filter
== NULL
) {
2687 /* Dispatch to filters */
2688 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2690 OSAddAtomic64(datalen
,
2691 &cfil_stats
.cfs_ctl_q_out_enqueued
);
2693 OSAddAtomic64(datalen
,
2694 &cfil_stats
.cfs_ctl_q_in_enqueued
);
2696 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2698 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
2702 * We have to return EJUSTRETURN in all cases to avoid double free
2705 error
= EJUSTRETURN
;
2707 CFIL_INFO_VERIFY(so
->so_cfil
);
2709 CFIL_LOG(LOG_INFO
, "return %d", error
);
2714 * cfil_service_inject_queue() re-inject data that passed the
2718 cfil_service_inject_queue(struct socket
*so
, int outgoing
)
2721 unsigned int datalen
;
2723 unsigned int copylen
;
2725 struct mbuf
*copy
= NULL
;
2726 struct cfi_buf
*cfi_buf
;
2727 struct cfil_queue
*inject_q
;
2728 int need_rwakeup
= 0;
2730 if (so
->so_cfil
== NULL
)
2733 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
2734 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
2736 socket_lock_assert_owned(so
);
2739 cfi_buf
= &so
->so_cfil
->cfi_snd
;
2740 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
2742 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
2743 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
2745 inject_q
= &cfi_buf
->cfi_inject_q
;
2747 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
2748 datalen
= cfil_data_length(data
, &mbcnt
);
2750 CFIL_LOG(LOG_INFO
, "data %llx datalen %u",
2751 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
);
2753 /* Make a copy in case of injection error */
2754 copy
= m_copym_mode(data
, 0, M_COPYALL
, M_DONTWAIT
,
2757 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2762 if ((copylen
= m_length(copy
)) != datalen
)
2763 panic("%s so %p copylen %d != datalen %d",
2764 __func__
, so
, copylen
, datalen
);
2767 socket_unlock(so
, 0);
2770 * Set both DONTWAIT and NBIO flags are we really
2771 * do not want to block
2773 error
= sosend(so
, NULL
, NULL
,
2775 MSG_SKIPCFIL
| MSG_DONTWAIT
| MSG_NBIO
);
2780 CFIL_LOG(LOG_ERR
, "sosend() failed %d",
2784 copy
->m_flags
|= M_SKIPCFIL
;
2788 * This work only because we support plain TCP
2789 * For UDP, RAWIP, MPTCP and message TCP we'll
2790 * need to call the appropriate sbappendxxx()
2791 * of fix sock_inject_data_in()
2793 if (sbappendstream(&so
->so_rcv
, copy
))
2797 /* Need to reassess if filter is still attached after unlock */
2798 if (so
->so_cfil
== NULL
) {
2799 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2800 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2801 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_detached
);
2808 /* Injection successful */
2809 cfil_queue_remove(inject_q
, data
, datalen
);
2812 cfi_buf
->cfi_pending_first
+= datalen
;
2813 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
2814 cfil_info_buf_verify(cfi_buf
);
2817 OSAddAtomic64(datalen
,
2818 &cfil_stats
.cfs_inject_q_out_passed
);
2820 OSAddAtomic64(datalen
,
2821 &cfil_stats
.cfs_inject_q_in_passed
);
2824 /* A single wakeup is for several packets is more efficient */
2828 if (error
!= 0 && so
->so_cfil
) {
2829 if (error
== ENOBUFS
)
2830 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
2831 if (error
== ENOMEM
)
2832 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
2835 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
2836 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
2838 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
2839 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
2846 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
)) {
2847 cfil_sock_notify_shutdown(so
, SHUT_WR
);
2848 if (cfil_sock_data_pending(&so
->so_snd
) == 0)
2849 soshutdownlock_final(so
, SHUT_WR
);
2851 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
2852 if (cfil_filters_attached(so
) == 0) {
2853 CFIL_LOG(LOG_INFO
, "so %llx waking",
2854 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2855 wakeup((caddr_t
)&so
->so_cfil
);
2859 CFIL_INFO_VERIFY(so
->so_cfil
);
2865 cfil_service_pending_queue(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2867 uint64_t passlen
, curlen
;
2869 unsigned int datalen
;
2871 struct cfil_entry
*entry
;
2872 struct cfe_buf
*entrybuf
;
2873 struct cfil_queue
*pending_q
;
2875 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2876 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2878 socket_lock_assert_owned(so
);
2880 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2882 entrybuf
= &entry
->cfe_snd
;
2884 entrybuf
= &entry
->cfe_rcv
;
2886 pending_q
= &entrybuf
->cfe_pending_q
;
2888 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
2891 * Locate the chunks of data that we can pass to the next filter
2892 * A data chunk must be on mbuf boundaries
2895 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
2896 datalen
= cfil_data_length(data
, NULL
);
2899 "data %llx datalen %u passlen %llu curlen %llu",
2900 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
2903 if (curlen
+ datalen
> passlen
)
2906 cfil_queue_remove(pending_q
, data
, datalen
);
2911 kcunit
<= MAX_CONTENT_FILTER
;
2913 error
= cfil_data_filter(so
, kcunit
, outgoing
,
2915 /* 0 means passed so we can continue */
2919 /* When data has passed all filters, re-inject */
2923 &so
->so_cfil
->cfi_snd
.cfi_inject_q
,
2925 OSAddAtomic64(datalen
,
2926 &cfil_stats
.cfs_inject_q_out_enqueued
);
2929 &so
->so_cfil
->cfi_rcv
.cfi_inject_q
,
2931 OSAddAtomic64(datalen
,
2932 &cfil_stats
.cfs_inject_q_in_enqueued
);
2937 CFIL_INFO_VERIFY(so
->so_cfil
);
2943 cfil_update_data_offsets(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2944 uint64_t pass_offset
, uint64_t peek_offset
)
2947 struct cfil_entry
*entry
= NULL
;
2948 struct cfe_buf
*entrybuf
;
2951 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
2953 socket_lock_assert_owned(so
);
2955 if (so
->so_cfil
== NULL
) {
2956 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2957 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2960 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
2961 CFIL_LOG(LOG_ERR
, "so %llx drop set",
2962 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2967 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2969 entrybuf
= &entry
->cfe_snd
;
2971 entrybuf
= &entry
->cfe_rcv
;
2973 /* Record updated offsets for this content filter */
2974 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
2975 entrybuf
->cfe_pass_offset
= pass_offset
;
2977 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
2978 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
2981 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
2982 pass_offset
, entrybuf
->cfe_pass_offset
);
2984 /* Filter does not want or need to see data that's allowed to pass */
2985 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
2986 peek_offset
> entrybuf
->cfe_peek_offset
) {
2987 entrybuf
->cfe_peek_offset
= peek_offset
;
2994 /* Move data held in control queue to pending queue if needed */
2995 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2997 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3001 error
= EJUSTRETURN
;
3005 * The filter is effectively detached when pass all from both sides
3006 * or when the socket is closed and no more data is waiting
3007 * to be delivered to the filter
3009 if (entry
!= NULL
&&
3010 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3011 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3012 ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3013 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3014 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3015 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3016 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3017 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3018 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3019 cfil_filters_attached(so
) == 0) {
3020 CFIL_LOG(LOG_INFO
, "so %llx waking",
3021 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3022 wakeup((caddr_t
)&so
->so_cfil
);
3025 CFIL_INFO_VERIFY(so
->so_cfil
);
3026 CFIL_LOG(LOG_INFO
, "return %d", error
);
3031 * Update pass offset for socket when no data is pending
3034 cfil_set_socket_pass_offset(struct socket
*so
, int outgoing
)
3036 struct cfi_buf
*cfi_buf
;
3037 struct cfil_entry
*entry
;
3038 struct cfe_buf
*entrybuf
;
3040 uint64_t pass_offset
= 0;
3042 if (so
->so_cfil
== NULL
)
3045 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3046 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3048 socket_lock_assert_owned(so
);
3051 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3053 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3055 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3056 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3057 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3059 /* Are we attached to a filter? */
3060 if (entry
->cfe_filter
== NULL
)
3064 entrybuf
= &entry
->cfe_snd
;
3066 entrybuf
= &entry
->cfe_rcv
;
3068 if (pass_offset
== 0 ||
3069 entrybuf
->cfe_pass_offset
< pass_offset
)
3070 pass_offset
= entrybuf
->cfe_pass_offset
;
3072 cfi_buf
->cfi_pass_offset
= pass_offset
;
3079 cfil_action_data_pass(struct socket
*so
, uint32_t kcunit
, int outgoing
,
3080 uint64_t pass_offset
, uint64_t peek_offset
)
3084 CFIL_LOG(LOG_INFO
, "");
3086 socket_lock_assert_owned(so
);
3088 error
= cfil_acquire_sockbuf(so
, outgoing
);
3090 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3091 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3092 outgoing
? "out" : "in");
3096 error
= cfil_update_data_offsets(so
, kcunit
, outgoing
,
3097 pass_offset
, peek_offset
);
3099 cfil_service_inject_queue(so
, outgoing
);
3101 cfil_set_socket_pass_offset(so
, outgoing
);
3103 CFIL_INFO_VERIFY(so
->so_cfil
);
3104 cfil_release_sockbuf(so
, outgoing
);
3111 cfil_flush_queues(struct socket
*so
)
3113 struct cfil_entry
*entry
;
3117 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3120 socket_lock_assert_owned(so
);
3123 * Flush the output queues and ignore errors as long as
3126 (void) cfil_acquire_sockbuf(so
, 1);
3127 if (so
->so_cfil
!= NULL
) {
3129 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3130 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3132 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3133 drained
+= cfil_queue_drain(
3134 &entry
->cfe_snd
.cfe_pending_q
);
3136 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_snd
.cfi_inject_q
);
3138 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3140 &cfil_stats
.cfs_flush_out_drop
);
3143 &cfil_stats
.cfs_flush_out_close
);
3146 cfil_release_sockbuf(so
, 1);
3149 * Flush the input queues
3151 (void) cfil_acquire_sockbuf(so
, 0);
3152 if (so
->so_cfil
!= NULL
) {
3154 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3155 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3157 drained
+= cfil_queue_drain(
3158 &entry
->cfe_rcv
.cfe_ctl_q
);
3159 drained
+= cfil_queue_drain(
3160 &entry
->cfe_rcv
.cfe_pending_q
);
3162 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_rcv
.cfi_inject_q
);
3164 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3166 &cfil_stats
.cfs_flush_in_drop
);
3169 &cfil_stats
.cfs_flush_in_close
);
3172 cfil_release_sockbuf(so
, 0);
3174 CFIL_INFO_VERIFY(so
->so_cfil
);
3178 cfil_action_drop(struct socket
*so
, uint32_t kcunit
)
3181 struct cfil_entry
*entry
;
3184 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3187 socket_lock_assert_owned(so
);
3189 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3191 /* Are we attached to the filter? */
3192 if (entry
->cfe_filter
== NULL
)
3195 so
->so_cfil
->cfi_flags
|= CFIF_DROP
;
3200 * Force the socket to be marked defunct
3201 * (forcing fixed along with rdar://19391339)
3203 error
= sosetdefunct(p
, so
,
3204 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
, FALSE
);
3206 /* Flush the socket buffer and disconnect */
3208 error
= sodefunct(p
, so
, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3210 /* The filter is done, mark as detached */
3211 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3212 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3213 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3215 /* Pending data needs to go */
3216 cfil_flush_queues(so
);
3218 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3219 if (cfil_filters_attached(so
) == 0) {
3220 CFIL_LOG(LOG_INFO
, "so %llx waking",
3221 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3222 wakeup((caddr_t
)&so
->so_cfil
);
3230 cfil_update_entry_offsets(struct socket
*so
, int outgoing
, unsigned int datalen
)
3232 struct cfil_entry
*entry
;
3233 struct cfe_buf
*entrybuf
;
3236 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3237 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3239 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3240 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3242 /* Are we attached to the filter? */
3243 if (entry
->cfe_filter
== NULL
)
3247 entrybuf
= &entry
->cfe_snd
;
3249 entrybuf
= &entry
->cfe_rcv
;
3251 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
3252 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
3253 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
3254 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3255 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3257 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
3259 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
3260 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
3262 CFIL_INFO_VERIFY(so
->so_cfil
);
3267 cfil_data_common(struct socket
*so
, int outgoing
, struct sockaddr
*to
,
3268 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3270 #pragma unused(to, control, flags)
3272 unsigned int datalen
;
3275 struct cfi_buf
*cfi_buf
;
3277 if (so
->so_cfil
== NULL
) {
3278 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3279 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3282 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3283 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3284 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3289 datalen
= cfil_data_length(data
, &mbcnt
);
3291 CFIL_LOG(LOG_INFO
, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3292 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3293 outgoing
? "out" : "in",
3294 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
3295 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
));
3298 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3300 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3302 cfi_buf
->cfi_pending_last
+= datalen
;
3303 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
3304 cfil_info_buf_verify(cfi_buf
);
3306 CFIL_LOG(LOG_INFO
, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3307 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3308 cfi_buf
->cfi_pending_last
,
3309 cfi_buf
->cfi_pass_offset
);
3311 /* Fast path when below pass offset */
3312 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
3313 cfil_update_entry_offsets(so
, outgoing
, datalen
);
3315 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3316 error
= cfil_data_filter(so
, kcunit
, outgoing
, data
,
3318 /* 0 means passed so continue with next filter */
3324 /* Move cursor if no filter claimed the data */
3326 cfi_buf
->cfi_pending_first
+= datalen
;
3327 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3328 cfil_info_buf_verify(cfi_buf
);
3331 CFIL_INFO_VERIFY(so
->so_cfil
);
3337 * Callback from socket layer sosendxxx()
3340 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
3341 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3345 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3348 socket_lock_assert_owned(so
);
3350 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3351 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3352 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3355 if (control
!= NULL
) {
3356 CFIL_LOG(LOG_ERR
, "so %llx control",
3357 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3358 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
3360 if ((flags
& MSG_OOB
)) {
3361 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3362 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3363 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
3365 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0)
3366 panic("so %p SB_LOCK not set", so
);
3368 if (so
->so_snd
.sb_cfil_thread
!= NULL
)
3369 panic("%s sb_cfil_thread %p not NULL", __func__
,
3370 so
->so_snd
.sb_cfil_thread
);
3372 error
= cfil_data_common(so
, 1, to
, data
, control
, flags
);
3378 * Callback from socket layer sbappendxxx()
3381 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
3382 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3386 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3389 socket_lock_assert_owned(so
);
3391 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3392 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3393 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3396 if (control
!= NULL
) {
3397 CFIL_LOG(LOG_ERR
, "so %llx control",
3398 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3399 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
3401 if (data
->m_type
== MT_OOBDATA
) {
3402 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3403 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3404 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
3406 error
= cfil_data_common(so
, 0, from
, data
, control
, flags
);
3412 * Callback from socket layer soshutdownxxx()
3414 * We may delay the shutdown write if there's outgoing data in process.
3416 * There is no point in delaying the shutdown read because the process
3417 * indicated that it does not want to read anymore data.
3420 cfil_sock_shutdown(struct socket
*so
, int *how
)
3424 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3427 socket_lock_assert_owned(so
);
3429 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3430 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
3433 * Check the state of the socket before the content filter
3435 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
3436 /* read already shut down */
3440 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
3441 /* write already shut down */
3446 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
3447 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3448 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3453 * shutdown read: SHUT_RD or SHUT_RDWR
3455 if (*how
!= SHUT_WR
) {
3456 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
3460 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
3461 cfil_sock_notify_shutdown(so
, SHUT_RD
);
3464 * shutdown write: SHUT_WR or SHUT_RDWR
3466 if (*how
!= SHUT_RD
) {
3467 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
3471 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
3472 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3474 * When outgoing data is pending, we delay the shutdown at the
3475 * protocol level until the content filters give the final
3476 * verdict on the pending data.
3478 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
3480 * When shutting down the read and write sides at once
3481 * we can proceed to the final shutdown of the read
3482 * side. Otherwise, we just return.
3484 if (*how
== SHUT_WR
) {
3485 error
= EJUSTRETURN
;
3486 } else if (*how
== SHUT_RDWR
) {
3496 * This is called when the socket is closed and there is no more
3497 * opportunity for filtering
3500 cfil_sock_is_closed(struct socket
*so
)
3505 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3508 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3510 socket_lock_assert_owned(so
);
3512 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3513 /* Let the filters know of the closing */
3514 error
= cfil_dispatch_closed_event(so
, kcunit
);
3517 /* Last chance to push passed data out */
3518 error
= cfil_acquire_sockbuf(so
, 1);
3520 cfil_service_inject_queue(so
, 1);
3521 cfil_release_sockbuf(so
, 1);
3523 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
3525 /* Pending data needs to go */
3526 cfil_flush_queues(so
);
3528 CFIL_INFO_VERIFY(so
->so_cfil
);
3532 * This is called when the socket is disconnected so let the filters
3533 * know about the disconnection and that no more data will come
3535 * The how parameter has the same values as soshutown()
3538 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
3543 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3546 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3547 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
3549 socket_lock_assert_owned(so
);
3551 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3552 /* Disconnect incoming side */
3554 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 0);
3555 /* Disconnect outgoing side */
3557 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 1);
3562 cfil_filters_attached(struct socket
*so
)
3564 struct cfil_entry
*entry
;
3568 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3571 socket_lock_assert_owned(so
);
3573 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3574 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3576 /* Are we attached to the filter? */
3577 if (entry
->cfe_filter
== NULL
)
3579 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
3581 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
3591 * This is called when the socket is closed and we are waiting for
3592 * the filters to gives the final pass or drop
3595 cfil_sock_close_wait(struct socket
*so
)
3597 lck_mtx_t
*mutex_held
;
3601 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3604 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3606 if (so
->so_proto
->pr_getlock
!= NULL
)
3607 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, 0);
3609 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3610 lck_mtx_assert(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3612 while (cfil_filters_attached(so
)) {
3614 * Notify the filters we are going away so they can detach
3616 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
3619 * Make sure we need to wait after the filter are notified
3620 * of the disconnection
3622 if (cfil_filters_attached(so
) == 0)
3625 CFIL_LOG(LOG_INFO
, "so %llx waiting",
3626 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3628 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
3629 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
3630 NSEC_PER_USEC
* 1000;
3632 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
3633 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
3634 error
= msleep((caddr_t
)&so
->so_cfil
, mutex_held
,
3635 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
3636 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
3638 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
3639 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
3642 * Force close in case of timeout
3645 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
3653 * Returns the size of the data held by the content filter by using
3656 cfil_sock_data_pending(struct sockbuf
*sb
)
3658 struct socket
*so
= sb
->sb_so
;
3659 uint64_t pending
= 0;
3661 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
3662 struct cfi_buf
*cfi_buf
;
3664 socket_lock_assert_owned(so
);
3666 if ((sb
->sb_flags
& SB_RECV
) == 0)
3667 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3669 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3671 pending
= cfi_buf
->cfi_pending_last
-
3672 cfi_buf
->cfi_pending_first
;
3675 * If we are limited by the "chars of mbufs used" roughly
3676 * adjust so we won't overcommit
3678 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
)
3679 pending
= cfi_buf
->cfi_pending_mbcnt
;
3682 VERIFY(pending
< INT32_MAX
);
3684 return (int32_t)(pending
);
3688 * Return the socket buffer space used by data being held by content filters
3689 * so processes won't clog the socket buffer
3692 cfil_sock_data_space(struct sockbuf
*sb
)
3694 struct socket
*so
= sb
->sb_so
;
3695 uint64_t pending
= 0;
3697 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
3698 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
3699 struct cfi_buf
*cfi_buf
;
3701 socket_lock_assert_owned(so
);
3703 if ((sb
->sb_flags
& SB_RECV
) == 0)
3704 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3706 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3708 pending
= cfi_buf
->cfi_pending_last
-
3709 cfi_buf
->cfi_pending_first
;
3712 * If we are limited by the "chars of mbufs used" roughly
3713 * adjust so we won't overcommit
3715 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
3716 pending
= cfi_buf
->cfi_pending_mbcnt
;
3719 VERIFY(pending
< INT32_MAX
);
3721 return (int32_t)(pending
);
3725 * A callback from the socket and protocol layer when data becomes
3726 * available in the socket buffer to give a chance for the content filter
3727 * to re-inject data that was held back
3730 cfil_sock_buf_update(struct sockbuf
*sb
)
3734 struct socket
*so
= sb
->sb_so
;
3736 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3742 socket_lock_assert_owned(so
);
3744 if ((sb
->sb_flags
& SB_RECV
) == 0) {
3745 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
3748 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
3750 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
3753 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
3756 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
3757 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3759 error
= cfil_acquire_sockbuf(so
, outgoing
);
3761 cfil_service_inject_queue(so
, outgoing
);
3762 cfil_release_sockbuf(so
, outgoing
);
3766 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3767 struct sysctl_req
*req
)
3769 #pragma unused(oidp, arg1, arg2)
3775 if (req
->newptr
!= USER_ADDR_NULL
)
3778 cfil_rw_lock_shared(&cfil_lck_rw
);
3780 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
3781 struct cfil_filter_stat filter_stat
;
3782 struct content_filter
*cfc
= content_filters
[i
];
3787 /* If just asking for the size */
3788 if (req
->oldptr
== USER_ADDR_NULL
) {
3789 len
+= sizeof(struct cfil_filter_stat
);
3793 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
3794 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
3795 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
3796 filter_stat
.cfs_flags
= cfc
->cf_flags
;
3797 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
3798 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
3800 error
= SYSCTL_OUT(req
, &filter_stat
,
3801 sizeof (struct cfil_filter_stat
));
3805 /* If just asking for the size */
3806 if (req
->oldptr
== USER_ADDR_NULL
)
3809 cfil_rw_unlock_shared(&cfil_lck_rw
);
3814 static int sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3815 struct sysctl_req
*req
)
3817 #pragma unused(oidp, arg1, arg2)
3820 struct cfil_info
*cfi
;
3823 if (req
->newptr
!= USER_ADDR_NULL
)
3826 cfil_rw_lock_shared(&cfil_lck_rw
);
3829 * If just asking for the size,
3831 if (req
->oldptr
== USER_ADDR_NULL
) {
3832 req
->oldidx
= cfil_sock_attached_count
*
3833 sizeof(struct cfil_sock_stat
);
3834 /* Bump the length in case new sockets gets attached */
3835 req
->oldidx
+= req
->oldidx
>> 3;
3839 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
3840 struct cfil_entry
*entry
;
3841 struct cfil_sock_stat stat
;
3842 struct socket
*so
= cfi
->cfi_so
;
3844 bzero(&stat
, sizeof(struct cfil_sock_stat
));
3845 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
3846 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
3847 stat
.cfs_flags
= cfi
->cfi_flags
;
3850 stat
.cfs_pid
= so
->last_pid
;
3851 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
3853 if (so
->so_flags
& SOF_DELEGATED
) {
3854 stat
.cfs_e_pid
= so
->e_pid
;
3855 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
3858 stat
.cfs_e_pid
= so
->last_pid
;
3859 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
3864 stat
.cfs_snd
.cbs_pending_first
=
3865 cfi
->cfi_snd
.cfi_pending_first
;
3866 stat
.cfs_snd
.cbs_pending_last
=
3867 cfi
->cfi_snd
.cfi_pending_last
;
3868 stat
.cfs_snd
.cbs_inject_q_len
=
3869 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
3870 stat
.cfs_snd
.cbs_pass_offset
=
3871 cfi
->cfi_snd
.cfi_pass_offset
;
3873 stat
.cfs_rcv
.cbs_pending_first
=
3874 cfi
->cfi_rcv
.cfi_pending_first
;
3875 stat
.cfs_rcv
.cbs_pending_last
=
3876 cfi
->cfi_rcv
.cfi_pending_last
;
3877 stat
.cfs_rcv
.cbs_inject_q_len
=
3878 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
3879 stat
.cfs_rcv
.cbs_pass_offset
=
3880 cfi
->cfi_rcv
.cfi_pass_offset
;
3882 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
3883 struct cfil_entry_stat
*estat
;
3884 struct cfe_buf
*ebuf
;
3885 struct cfe_buf_stat
*sbuf
;
3887 entry
= &cfi
->cfi_entries
[i
];
3889 estat
= &stat
.ces_entries
[i
];
3891 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
3892 estat
->ces_filter_id
= entry
->cfe_filter
?
3893 entry
->cfe_filter
->cf_kcunit
: 0;
3894 estat
->ces_flags
= entry
->cfe_flags
;
3895 estat
->ces_necp_control_unit
=
3896 entry
->cfe_necp_control_unit
;
3898 estat
->ces_last_event
.tv_sec
=
3899 (int64_t)entry
->cfe_last_event
.tv_sec
;
3900 estat
->ces_last_event
.tv_usec
=
3901 (int64_t)entry
->cfe_last_event
.tv_usec
;
3903 estat
->ces_last_action
.tv_sec
=
3904 (int64_t)entry
->cfe_last_action
.tv_sec
;
3905 estat
->ces_last_action
.tv_usec
=
3906 (int64_t)entry
->cfe_last_action
.tv_usec
;
3908 ebuf
= &entry
->cfe_snd
;
3909 sbuf
= &estat
->ces_snd
;
3910 sbuf
->cbs_pending_first
=
3911 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3912 sbuf
->cbs_pending_last
=
3913 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3914 sbuf
->cbs_ctl_first
=
3915 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3916 sbuf
->cbs_ctl_last
=
3917 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3918 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3919 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3920 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3922 ebuf
= &entry
->cfe_rcv
;
3923 sbuf
= &estat
->ces_rcv
;
3924 sbuf
->cbs_pending_first
=
3925 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
3926 sbuf
->cbs_pending_last
=
3927 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
3928 sbuf
->cbs_ctl_first
=
3929 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
3930 sbuf
->cbs_ctl_last
=
3931 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
3932 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
3933 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
3934 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
3936 error
= SYSCTL_OUT(req
, &stat
,
3937 sizeof (struct cfil_sock_stat
));
3942 cfil_rw_unlock_shared(&cfil_lck_rw
);