2 * Copyright (c) 2013-2017 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
54 * NECP FILTER CONTROL UNIT
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
84 * THE MESSAGING PROTOCOL
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
164 * PER SOCKET "struct cfil_info"
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
175 * - The inject queue for data that passed the filters and that needs
177 * - A content filter specific state in a set of "struct cfil_entry"
180 * CONTENT FILTER STATE "struct cfil_entry"
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
189 * For each direction, "struct cfil_entry" maintains the following information:
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
199 * CONTENT FILTER QUEUES
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
209 * The data moves into the three content filter queues according to this
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
234 * IMPACT ON FLOW CONTROL
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
283 * - For TCP sockets only
285 * - Does not support TCP unordered messages
297 * If support datagram, enqueue control and address mbufs as well
300 #include <sys/types.h>
301 #include <sys/kern_control.h>
302 #include <sys/queue.h>
303 #include <sys/domain.h>
304 #include <sys/protosw.h>
305 #include <sys/syslog.h>
307 #include <kern/locks.h>
308 #include <kern/zalloc.h>
309 #include <kern/debug.h>
311 #include <net/content_filter.h>
313 #include <netinet/in_pcb.h>
314 #include <netinet/tcp.h>
315 #include <netinet/tcp_var.h>
318 #include <libkern/libkern.h>
321 #define MAX_CONTENT_FILTER 2
326 * The structure content_filter represents a user space content filter
327 * It's created and associated with a kernel control socket instance
329 struct content_filter
{
330 kern_ctl_ref cf_kcref
;
334 uint32_t cf_necp_control_unit
;
336 uint32_t cf_sock_count
;
337 TAILQ_HEAD(, cfil_entry
) cf_sock_entries
;
340 #define CFF_ACTIVE 0x01
341 #define CFF_DETACHING 0x02
342 #define CFF_FLOW_CONTROLLED 0x04
344 struct content_filter
**content_filters
= NULL
;
345 uint32_t cfil_active_count
= 0; /* Number of active content filters */
346 uint32_t cfil_sock_attached_count
= 0; /* Number of sockets attachements */
347 uint32_t cfil_close_wait_timeout
= 1000; /* in milliseconds */
349 static kern_ctl_ref cfil_kctlref
= NULL
;
351 static lck_grp_attr_t
*cfil_lck_grp_attr
= NULL
;
352 static lck_attr_t
*cfil_lck_attr
= NULL
;
353 static lck_grp_t
*cfil_lck_grp
= NULL
;
354 decl_lck_rw_data(static, cfil_lck_rw
);
356 #define CFIL_RW_LCK_MAX 8
358 int cfil_rw_nxt_lck
= 0;
359 void* cfil_rw_lock_history
[CFIL_RW_LCK_MAX
];
361 int cfil_rw_nxt_unlck
= 0;
362 void* cfil_rw_unlock_history
[CFIL_RW_LCK_MAX
];
364 #define CONTENT_FILTER_ZONE_NAME "content_filter"
365 #define CONTENT_FILTER_ZONE_MAX 10
366 static struct zone
*content_filter_zone
= NULL
; /* zone for content_filter */
369 #define CFIL_INFO_ZONE_NAME "cfil_info"
370 #define CFIL_INFO_ZONE_MAX 1024
371 static struct zone
*cfil_info_zone
= NULL
; /* zone for cfil_info */
373 MBUFQ_HEAD(cfil_mqhead
);
376 uint64_t q_start
; /* offset of first byte in queue */
377 uint64_t q_end
; /* offset of last byte in queue */
378 struct cfil_mqhead q_mq
;
384 * The is one entry per content filter
387 TAILQ_ENTRY(cfil_entry
) cfe_link
;
388 struct content_filter
*cfe_filter
;
390 struct cfil_info
*cfe_cfil_info
;
392 uint32_t cfe_necp_control_unit
;
393 struct timeval cfe_last_event
; /* To user space */
394 struct timeval cfe_last_action
; /* From user space */
398 * cfe_pending_q holds data that has been delivered to
399 * the filter and for which we are waiting for an action
401 struct cfil_queue cfe_pending_q
;
403 * This queue is for data that has not be delivered to
404 * the content filter (new data, pass peek or flow control)
406 struct cfil_queue cfe_ctl_q
;
408 uint64_t cfe_pass_offset
;
409 uint64_t cfe_peek_offset
;
414 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
415 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
416 #define CFEF_DATA_START 0x0004 /* can send data event */
417 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
418 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
419 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
420 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
421 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
424 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
425 struct timeval _tdiff; \
426 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
427 timersub(t1, t0, &_tdiff); \
428 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
429 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
430 (cfil)->cfi_op_list_ctr ++; \
436 * There is a struct cfil_info per socket
439 TAILQ_ENTRY(cfil_info
) cfi_link
;
440 struct socket
*cfi_so
;
442 uint64_t cfi_sock_id
;
443 struct timeval64 cfi_first_event
;
444 uint32_t cfi_op_list_ctr
;
445 uint32_t cfi_op_time
[CFI_MAX_TIME_LOG_ENTRY
]; /* time interval in microseconds since first event */
446 unsigned char cfi_op_list
[CFI_MAX_TIME_LOG_ENTRY
];
450 * cfi_pending_first and cfi_pending_last describe the total
451 * amount of data outstanding for all the filters on
452 * this socket and data in the flow queue
453 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
455 uint64_t cfi_pending_first
;
456 uint64_t cfi_pending_last
;
457 int cfi_pending_mbcnt
;
459 * cfi_pass_offset is the minimum of all the filters
461 uint64_t cfi_pass_offset
;
463 * cfi_inject_q holds data that needs to be re-injected
464 * into the socket after filtering and that can
465 * be queued because of flow control
467 struct cfil_queue cfi_inject_q
;
470 struct cfil_entry cfi_entries
[MAX_CONTENT_FILTER
];
471 } __attribute__((aligned(8)));
473 #define CFIF_DROP 0x0001 /* drop action applied */
474 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
475 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
476 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
477 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
478 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
479 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
481 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
482 #define CFI_SHIFT_GENCNT 32
483 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
484 #define CFI_SHIFT_FLOWHASH 0
486 TAILQ_HEAD(cfil_sock_head
, cfil_info
) cfil_sock_head
;
488 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
489 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
495 struct cfil_stats cfil_stats
;
498 * For troubleshooting
500 int cfil_log_level
= LOG_ERR
;
504 * Sysctls for logs and statistics
506 static int sysctl_cfil_filter_list(struct sysctl_oid
*, void *, int,
507 struct sysctl_req
*);
508 static int sysctl_cfil_sock_list(struct sysctl_oid
*, void *, int,
509 struct sysctl_req
*);
511 SYSCTL_NODE(_net
, OID_AUTO
, cfil
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "cfil");
513 SYSCTL_INT(_net_cfil
, OID_AUTO
, log
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
514 &cfil_log_level
, 0, "");
516 SYSCTL_INT(_net_cfil
, OID_AUTO
, debug
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
519 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sock_attached_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
520 &cfil_sock_attached_count
, 0, "");
522 SYSCTL_UINT(_net_cfil
, OID_AUTO
, active_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
523 &cfil_active_count
, 0, "");
525 SYSCTL_UINT(_net_cfil
, OID_AUTO
, close_wait_timeout
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
526 &cfil_close_wait_timeout
, 0, "");
528 static int cfil_sbtrim
= 1;
529 SYSCTL_UINT(_net_cfil
, OID_AUTO
, sbtrim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
530 &cfil_sbtrim
, 0, "");
532 SYSCTL_PROC(_net_cfil
, OID_AUTO
, filter_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
533 0, 0, sysctl_cfil_filter_list
, "S,cfil_filter_stat", "");
535 SYSCTL_PROC(_net_cfil
, OID_AUTO
, sock_list
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
536 0, 0, sysctl_cfil_sock_list
, "S,cfil_sock_stat", "");
538 SYSCTL_STRUCT(_net_cfil
, OID_AUTO
, stats
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
539 &cfil_stats
, cfil_stats
, "");
542 * Forward declaration to appease the compiler
544 static int cfil_action_data_pass(struct socket
*, uint32_t, int,
546 static int cfil_action_drop(struct socket
*, uint32_t);
547 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr
*);
548 static int cfil_dispatch_closed_event(struct socket
*, int);
549 static int cfil_data_common(struct socket
*, int, struct sockaddr
*,
550 struct mbuf
*, struct mbuf
*, uint32_t);
551 static int cfil_data_filter(struct socket
*, uint32_t, int,
552 struct mbuf
*, uint64_t);
553 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*,
554 struct in_addr
, u_int16_t
);
555 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*,
556 struct in6_addr
*, u_int16_t
);
557 static int cfil_dispatch_attach_event(struct socket
*, uint32_t);
558 static void cfil_info_free(struct socket
*, struct cfil_info
*);
559 static struct cfil_info
* cfil_info_alloc(struct socket
*);
560 static int cfil_info_attach_unit(struct socket
*, uint32_t);
561 static struct socket
* cfil_socket_from_sock_id(cfil_sock_id_t
);
562 static struct socket
*cfil_socket_from_client_uuid(uuid_t
, bool *);
563 static int cfil_service_pending_queue(struct socket
*, uint32_t, int);
564 static int cfil_data_service_ctl_q(struct socket
*, uint32_t, int);
565 static void cfil_info_verify(struct cfil_info
*);
566 static int cfil_update_data_offsets(struct socket
*, uint32_t, int,
568 static int cfil_acquire_sockbuf(struct socket
*, int);
569 static void cfil_release_sockbuf(struct socket
*, int);
570 static int cfil_filters_attached(struct socket
*);
572 static void cfil_rw_lock_exclusive(lck_rw_t
*);
573 static void cfil_rw_unlock_exclusive(lck_rw_t
*);
574 static void cfil_rw_lock_shared(lck_rw_t
*);
575 static void cfil_rw_unlock_shared(lck_rw_t
*);
576 static boolean_t
cfil_rw_lock_shared_to_exclusive(lck_rw_t
*);
577 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t
*);
579 static unsigned int cfil_data_length(struct mbuf
*, int *);
582 * Content filter global read write lock
586 cfil_rw_lock_exclusive(lck_rw_t
*lck
)
590 lr_saved
= __builtin_return_address(0);
592 lck_rw_lock_exclusive(lck
);
594 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
595 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
599 cfil_rw_unlock_exclusive(lck_rw_t
*lck
)
603 lr_saved
= __builtin_return_address(0);
605 lck_rw_unlock_exclusive(lck
);
607 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
608 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
612 cfil_rw_lock_shared(lck_rw_t
*lck
)
616 lr_saved
= __builtin_return_address(0);
618 lck_rw_lock_shared(lck
);
620 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
621 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
625 cfil_rw_unlock_shared(lck_rw_t
*lck
)
629 lr_saved
= __builtin_return_address(0);
631 lck_rw_unlock_shared(lck
);
633 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
634 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
638 cfil_rw_lock_shared_to_exclusive(lck_rw_t
*lck
)
643 lr_saved
= __builtin_return_address(0);
645 upgraded
= lck_rw_lock_shared_to_exclusive(lck
);
647 cfil_rw_unlock_history
[cfil_rw_nxt_unlck
] = lr_saved
;
648 cfil_rw_nxt_unlck
= (cfil_rw_nxt_unlck
+ 1) % CFIL_RW_LCK_MAX
;
654 cfil_rw_lock_exclusive_to_shared(lck_rw_t
*lck
)
658 lr_saved
= __builtin_return_address(0);
660 lck_rw_lock_exclusive_to_shared(lck
);
662 cfil_rw_lock_history
[cfil_rw_nxt_lck
] = lr_saved
;
663 cfil_rw_nxt_lck
= (cfil_rw_nxt_lck
+ 1) % CFIL_RW_LCK_MAX
;
667 cfil_rw_lock_assert_held(lck_rw_t
*lck
, int exclusive
)
670 #pragma unused(lck, exclusive)
673 exclusive
? LCK_RW_ASSERT_EXCLUSIVE
: LCK_RW_ASSERT_HELD
);
677 * Return the number of bytes in the mbuf chain using the same
678 * method as m_length() or sballoc()
681 cfil_data_length(struct mbuf
*m
, int *retmbcnt
)
687 if (retmbcnt
== NULL
)
688 return (m_length(m
));
692 for (m0
= m
; m0
!= NULL
; m0
= m0
->m_next
) {
695 if (m0
->m_flags
& M_EXT
)
696 mbcnt
+= m0
->m_ext
.ext_size
;
703 * Common mbuf queue utilities
707 cfil_queue_init(struct cfil_queue
*cfq
)
711 MBUFQ_INIT(&cfq
->q_mq
);
714 static inline uint64_t
715 cfil_queue_drain(struct cfil_queue
*cfq
)
717 uint64_t drained
= cfq
->q_start
- cfq
->q_end
;
720 MBUFQ_DRAIN(&cfq
->q_mq
);
725 /* Return 1 when empty, 0 otherwise */
727 cfil_queue_empty(struct cfil_queue
*cfq
)
729 return (MBUFQ_EMPTY(&cfq
->q_mq
));
732 static inline uint64_t
733 cfil_queue_offset_first(struct cfil_queue
*cfq
)
735 return (cfq
->q_start
);
738 static inline uint64_t
739 cfil_queue_offset_last(struct cfil_queue
*cfq
)
744 static inline uint64_t
745 cfil_queue_len(struct cfil_queue
*cfq
)
747 return (cfq
->q_end
- cfq
->q_start
);
751 * Routines to verify some fundamental assumptions
755 cfil_queue_verify(struct cfil_queue
*cfq
)
759 uint64_t queuesize
= 0;
761 /* Verify offset are ordered */
762 VERIFY(cfq
->q_start
<= cfq
->q_end
);
765 * When queue is empty, the offsets are equal otherwise the offsets
768 VERIFY((MBUFQ_EMPTY(&cfq
->q_mq
) && cfq
->q_start
== cfq
->q_end
) ||
769 (!MBUFQ_EMPTY(&cfq
->q_mq
) &&
770 cfq
->q_start
!= cfq
->q_end
));
772 MBUFQ_FOREACH(m
, &cfq
->q_mq
) {
773 size_t chainsize
= 0;
774 unsigned int mlen
= m_length(m
);
776 if (m
== (void *)M_TAG_FREE_PATTERN
||
777 m
->m_next
== (void *)M_TAG_FREE_PATTERN
||
778 m
->m_nextpkt
== (void *)M_TAG_FREE_PATTERN
)
779 panic("%s - mq %p is free at %p", __func__
,
781 for (n
= m
; n
!= NULL
; n
= n
->m_next
) {
782 if (n
->m_type
!= MT_DATA
&&
783 n
->m_type
!= MT_HEADER
&&
784 n
->m_type
!= MT_OOBDATA
)
785 panic("%s - %p unsupported type %u", __func__
,
787 chainsize
+= n
->m_len
;
789 if (mlen
!= chainsize
)
790 panic("%s - %p m_length() %u != chainsize %lu",
791 __func__
, m
, mlen
, chainsize
);
792 queuesize
+= chainsize
;
794 if (queuesize
!= cfq
->q_end
- cfq
->q_start
)
795 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__
,
796 m
, queuesize
, cfq
->q_end
- cfq
->q_start
);
800 cfil_queue_enqueue(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
802 CFIL_QUEUE_VERIFY(cfq
);
804 MBUFQ_ENQUEUE(&cfq
->q_mq
, m
);
807 CFIL_QUEUE_VERIFY(cfq
);
811 cfil_queue_remove(struct cfil_queue
*cfq
, mbuf_t m
, size_t len
)
813 CFIL_QUEUE_VERIFY(cfq
);
815 VERIFY(m_length(m
) == len
);
817 MBUFQ_REMOVE(&cfq
->q_mq
, m
);
818 MBUFQ_NEXT(m
) = NULL
;
821 CFIL_QUEUE_VERIFY(cfq
);
825 cfil_queue_first(struct cfil_queue
*cfq
)
827 return (MBUFQ_FIRST(&cfq
->q_mq
));
831 cfil_queue_next(struct cfil_queue
*cfq
, mbuf_t m
)
834 return (MBUFQ_NEXT(m
));
838 cfil_entry_buf_verify(struct cfe_buf
*cfe_buf
)
840 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_ctl_q
);
841 CFIL_QUEUE_VERIFY(&cfe_buf
->cfe_pending_q
);
843 /* Verify the queues are ordered so that pending is before ctl */
844 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
>= cfe_buf
->cfe_pending_q
.q_end
);
846 /* The peek offset cannot be less than the pass offset */
847 VERIFY(cfe_buf
->cfe_peek_offset
>= cfe_buf
->cfe_pass_offset
);
849 /* Make sure we've updated the offset we peeked at */
850 VERIFY(cfe_buf
->cfe_ctl_q
.q_start
<= cfe_buf
->cfe_peeked
);
854 cfil_entry_verify(struct cfil_entry
*entry
)
856 cfil_entry_buf_verify(&entry
->cfe_snd
);
857 cfil_entry_buf_verify(&entry
->cfe_rcv
);
861 cfil_info_buf_verify(struct cfi_buf
*cfi_buf
)
863 CFIL_QUEUE_VERIFY(&cfi_buf
->cfi_inject_q
);
865 VERIFY(cfi_buf
->cfi_pending_first
<= cfi_buf
->cfi_pending_last
);
866 VERIFY(cfi_buf
->cfi_pending_mbcnt
>= 0);
870 cfil_info_verify(struct cfil_info
*cfil_info
)
874 if (cfil_info
== NULL
)
877 cfil_info_buf_verify(&cfil_info
->cfi_snd
);
878 cfil_info_buf_verify(&cfil_info
->cfi_rcv
);
880 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++)
881 cfil_entry_verify(&cfil_info
->cfi_entries
[i
]);
885 verify_content_filter(struct content_filter
*cfc
)
887 struct cfil_entry
*entry
;
890 VERIFY(cfc
->cf_sock_count
>= 0);
892 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
894 VERIFY(cfc
== entry
->cfe_filter
);
896 VERIFY(count
== cfc
->cf_sock_count
);
900 * Kernel control socket callbacks
903 cfil_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
907 struct content_filter
*cfc
= NULL
;
909 CFIL_LOG(LOG_NOTICE
, "");
911 cfc
= zalloc(content_filter_zone
);
913 CFIL_LOG(LOG_ERR
, "zalloc failed");
917 bzero(cfc
, sizeof(struct content_filter
));
919 cfil_rw_lock_exclusive(&cfil_lck_rw
);
920 if (content_filters
== NULL
) {
921 struct content_filter
**tmp
;
923 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
926 struct content_filter
**,
927 MAX_CONTENT_FILTER
* sizeof(struct content_filter
*),
931 cfil_rw_lock_exclusive(&cfil_lck_rw
);
933 if (tmp
== NULL
&& content_filters
== NULL
) {
935 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
938 /* Another thread may have won the race */
939 if (content_filters
!= NULL
)
942 content_filters
= tmp
;
945 if (sac
->sc_unit
== 0 || sac
->sc_unit
> MAX_CONTENT_FILTER
) {
946 CFIL_LOG(LOG_ERR
, "bad sc_unit %u", sac
->sc_unit
);
948 } else if (content_filters
[sac
->sc_unit
- 1] != NULL
) {
949 CFIL_LOG(LOG_ERR
, "sc_unit %u in use", sac
->sc_unit
);
953 * kernel control socket kcunit numbers start at 1
955 content_filters
[sac
->sc_unit
- 1] = cfc
;
957 cfc
->cf_kcref
= kctlref
;
958 cfc
->cf_kcunit
= sac
->sc_unit
;
959 TAILQ_INIT(&cfc
->cf_sock_entries
);
964 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
966 if (error
!= 0 && cfc
!= NULL
)
967 zfree(content_filter_zone
, cfc
);
970 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_ok
);
972 OSIncrementAtomic(&cfil_stats
.cfs_ctl_connect_fail
);
974 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
975 error
, cfil_active_count
, sac
->sc_unit
);
981 cfil_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
)
983 #pragma unused(kctlref)
985 struct content_filter
*cfc
;
986 struct cfil_entry
*entry
;
988 CFIL_LOG(LOG_NOTICE
, "");
990 if (content_filters
== NULL
) {
991 CFIL_LOG(LOG_ERR
, "no content filter");
995 if (kcunit
> MAX_CONTENT_FILTER
) {
996 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
997 kcunit
, MAX_CONTENT_FILTER
);
1002 cfc
= (struct content_filter
*)unitinfo
;
1006 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1007 if (content_filters
[kcunit
- 1] != cfc
|| cfc
->cf_kcunit
!= kcunit
) {
1008 CFIL_LOG(LOG_ERR
, "bad unit info %u)",
1010 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1013 cfc
->cf_flags
|= CFF_DETACHING
;
1015 * Remove all sockets from the filter
1017 while ((entry
= TAILQ_FIRST(&cfc
->cf_sock_entries
)) != NULL
) {
1018 cfil_rw_lock_assert_held(&cfil_lck_rw
, 1);
1020 verify_content_filter(cfc
);
1022 * Accept all outstanding data by pushing to next filter
1025 * TBD: Actually we should make sure all data has been pushed
1028 if (entry
->cfe_cfil_info
&& entry
->cfe_cfil_info
->cfi_so
) {
1029 struct cfil_info
*cfil_info
= entry
->cfe_cfil_info
;
1030 struct socket
*so
= cfil_info
->cfi_so
;
1032 /* Need to let data flow immediately */
1033 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
|
1037 * Respect locking hierarchy
1039 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1044 * When cfe_filter is NULL the filter is detached
1045 * and the entry has been removed from cf_sock_entries
1047 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
) {
1048 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1051 (void) cfil_action_data_pass(so
, kcunit
, 1,
1055 (void) cfil_action_data_pass(so
, kcunit
, 0,
1059 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1062 * Check again as the socket may have been unlocked
1063 * when when calling cfil_acquire_sockbuf()
1065 if (so
->so_cfil
== NULL
|| entry
->cfe_filter
== NULL
)
1068 /* The filter is now detached */
1069 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
1070 CFIL_LOG(LOG_NOTICE
, "so %llx detached %u",
1071 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1073 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
1074 cfil_filters_attached(so
) == 0) {
1075 CFIL_LOG(LOG_NOTICE
, "so %llx waking",
1076 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1077 wakeup((caddr_t
)&so
->so_cfil
);
1081 * Remove the filter entry from the content filter
1082 * but leave the rest of the state intact as the queues
1083 * may not be empty yet
1085 entry
->cfe_filter
= NULL
;
1086 entry
->cfe_necp_control_unit
= 0;
1088 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1089 cfc
->cf_sock_count
--;
1091 socket_unlock(so
, 1);
1094 verify_content_filter(cfc
);
1096 VERIFY(cfc
->cf_sock_count
== 0);
1099 * Make filter inactive
1101 content_filters
[kcunit
- 1] = NULL
;
1102 cfil_active_count
--;
1103 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1105 zfree(content_filter_zone
, cfc
);
1108 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_ok
);
1110 OSIncrementAtomic(&cfil_stats
.cfs_ctl_disconnect_fail
);
1112 CFIL_LOG(LOG_INFO
, "return %d cfil_active_count %u kcunit %u",
1113 error
, cfil_active_count
, kcunit
);
1119 * cfil_acquire_sockbuf()
1121 * Prevent any other thread from acquiring the sockbuf
1122 * We use sb_cfil_thread as a semaphore to prevent other threads from
1123 * messing with the sockbuf -- see sblock()
1124 * Note: We do not set SB_LOCK here because the thread may check or modify
1125 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1126 * sblock(), sbunlock() or sodefunct()
1129 cfil_acquire_sockbuf(struct socket
*so
, int outgoing
)
1131 thread_t tp
= current_thread();
1132 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1133 lck_mtx_t
*mutex_held
;
1137 * Wait until no thread is holding the sockbuf and other content
1138 * filter threads have released the sockbuf
1140 while ((sb
->sb_flags
& SB_LOCK
) ||
1141 (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)) {
1142 if (so
->so_proto
->pr_getlock
!= NULL
)
1143 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
1145 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
1147 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
1150 VERIFY(sb
->sb_wantlock
!= 0);
1152 msleep(&sb
->sb_flags
, mutex_held
, PSOCK
, "cfil_acquire_sockbuf",
1155 VERIFY(sb
->sb_wantlock
!= 0);
1159 * Use reference count for repetitive calls on same thread
1161 if (sb
->sb_cfil_refs
== 0) {
1162 VERIFY(sb
->sb_cfil_thread
== NULL
);
1163 VERIFY((sb
->sb_flags
& SB_LOCK
) == 0);
1165 sb
->sb_cfil_thread
= tp
;
1166 sb
->sb_flags
|= SB_LOCK
;
1170 /* We acquire the socket buffer when we need to cleanup */
1171 if (so
->so_cfil
== NULL
) {
1172 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
1173 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1175 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1176 CFIL_LOG(LOG_ERR
, "so %llx drop set",
1177 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1185 cfil_release_sockbuf(struct socket
*so
, int outgoing
)
1187 struct sockbuf
*sb
= outgoing
? &so
->so_snd
: &so
->so_rcv
;
1188 thread_t tp
= current_thread();
1190 socket_lock_assert_owned(so
);
1192 if (sb
->sb_cfil_thread
!= NULL
&& sb
->sb_cfil_thread
!= tp
)
1193 panic("%s sb_cfil_thread %p not current %p", __func__
,
1194 sb
->sb_cfil_thread
, tp
);
1196 * Don't panic if we are defunct because SB_LOCK has
1197 * been cleared by sodefunct()
1199 if (!(so
->so_flags
& SOF_DEFUNCT
) && !(sb
->sb_flags
& SB_LOCK
))
1200 panic("%s SB_LOCK not set on %p", __func__
,
1203 * We can unlock when the thread unwinds to the last reference
1206 if (sb
->sb_cfil_refs
== 0) {
1207 sb
->sb_cfil_thread
= NULL
;
1208 sb
->sb_flags
&= ~SB_LOCK
;
1210 if (sb
->sb_wantlock
> 0)
1211 wakeup(&sb
->sb_flags
);
1216 cfil_sock_id_from_socket(struct socket
*so
)
1218 if ((so
->so_flags
& SOF_CONTENT_FILTER
) && so
->so_cfil
)
1219 return (so
->so_cfil
->cfi_sock_id
);
1221 return (CFIL_SOCK_ID_NONE
);
1224 static struct socket
*
1225 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id
)
1227 struct socket
*so
= NULL
;
1228 u_int64_t gencnt
= cfil_sock_id
>> 32;
1229 u_int32_t flowhash
= (u_int32_t
)(cfil_sock_id
& 0x0ffffffff);
1230 struct inpcb
*inp
= NULL
;
1231 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1233 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1234 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1235 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1236 inp
->inp_socket
!= NULL
&&
1237 inp
->inp_flowhash
== flowhash
&&
1238 (inp
->inp_socket
->so_gencnt
& 0x0ffffffff) == gencnt
&&
1239 inp
->inp_socket
->so_cfil
!= NULL
) {
1240 so
= inp
->inp_socket
;
1244 lck_rw_done(pcbinfo
->ipi_lock
);
1247 OSIncrementAtomic(&cfil_stats
.cfs_sock_id_not_found
);
1249 "no socket for sock_id %llx gencnt %llx flowhash %x",
1250 cfil_sock_id
, gencnt
, flowhash
);
1256 static struct socket
*
1257 cfil_socket_from_client_uuid(uuid_t necp_client_uuid
, bool *cfil_attached
)
1259 struct socket
*so
= NULL
;
1260 struct inpcb
*inp
= NULL
;
1261 struct inpcbinfo
*pcbinfo
= &tcbinfo
;
1263 lck_rw_lock_shared(pcbinfo
->ipi_lock
);
1264 LIST_FOREACH(inp
, pcbinfo
->ipi_listhead
, inp_list
) {
1265 if (inp
->inp_state
!= INPCB_STATE_DEAD
&&
1266 inp
->inp_socket
!= NULL
&&
1267 uuid_compare(inp
->necp_client_uuid
, necp_client_uuid
) == 0) {
1268 *cfil_attached
= (inp
->inp_socket
->so_cfil
!= NULL
);
1269 so
= inp
->inp_socket
;
1273 lck_rw_done(pcbinfo
->ipi_lock
);
1279 cfil_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, mbuf_t m
,
1282 #pragma unused(kctlref, flags)
1284 struct cfil_msg_hdr
*msghdr
;
1285 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1287 struct cfil_msg_action
*action_msg
;
1288 struct cfil_entry
*entry
;
1290 CFIL_LOG(LOG_INFO
, "");
1292 if (content_filters
== NULL
) {
1293 CFIL_LOG(LOG_ERR
, "no content filter");
1297 if (kcunit
> MAX_CONTENT_FILTER
) {
1298 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1299 kcunit
, MAX_CONTENT_FILTER
);
1304 if (m_length(m
) < sizeof(struct cfil_msg_hdr
)) {
1305 CFIL_LOG(LOG_ERR
, "too short %u", m_length(m
));
1309 msghdr
= (struct cfil_msg_hdr
*)mbuf_data(m
);
1310 if (msghdr
->cfm_version
!= CFM_VERSION_CURRENT
) {
1311 CFIL_LOG(LOG_ERR
, "bad version %u", msghdr
->cfm_version
);
1315 if (msghdr
->cfm_type
!= CFM_TYPE_ACTION
) {
1316 CFIL_LOG(LOG_ERR
, "bad type %u", msghdr
->cfm_type
);
1320 /* Validate action operation */
1321 switch (msghdr
->cfm_op
) {
1322 case CFM_OP_DATA_UPDATE
:
1324 &cfil_stats
.cfs_ctl_action_data_update
);
1327 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_drop
);
1329 case CFM_OP_BLESS_CLIENT
:
1330 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_bless_client
)) {
1331 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1333 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1338 error
= cfil_action_bless_client(kcunit
, msghdr
);
1341 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_op
);
1342 CFIL_LOG(LOG_ERR
, "bad op %u", msghdr
->cfm_op
);
1346 if (msghdr
->cfm_len
!= sizeof(struct cfil_msg_action
)) {
1347 OSIncrementAtomic(&cfil_stats
.cfs_ctl_action_bad_len
);
1349 CFIL_LOG(LOG_ERR
, "bad len: %u for op %u",
1354 cfil_rw_lock_shared(&cfil_lck_rw
);
1355 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1356 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1359 cfil_rw_unlock_shared(&cfil_lck_rw
);
1363 so
= cfil_socket_from_sock_id(msghdr
->cfm_sock_id
);
1365 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1366 msghdr
->cfm_sock_id
);
1368 cfil_rw_unlock_shared(&cfil_lck_rw
);
1371 cfil_rw_unlock_shared(&cfil_lck_rw
);
1375 if (so
->so_cfil
== NULL
) {
1376 CFIL_LOG(LOG_NOTICE
, "so %llx not attached",
1377 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1380 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
1381 CFIL_LOG(LOG_NOTICE
, "so %llx drop set",
1382 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1386 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
1387 if (entry
->cfe_filter
== NULL
) {
1388 CFIL_LOG(LOG_NOTICE
, "so %llx no filter",
1389 (uint64_t)VM_KERNEL_ADDRPERM(so
));
1394 if (entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
)
1395 entry
->cfe_flags
|= CFEF_DATA_START
;
1398 "so %llx attached not sent for %u",
1399 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
1404 microuptime(&entry
->cfe_last_action
);
1405 CFI_ADD_TIME_LOG(so
->so_cfil
, &entry
->cfe_last_action
, &so
->so_cfil
->cfi_first_event
, msghdr
->cfm_op
);
1407 action_msg
= (struct cfil_msg_action
*)msghdr
;
1409 switch (msghdr
->cfm_op
) {
1410 case CFM_OP_DATA_UPDATE
:
1411 if (action_msg
->cfa_out_peek_offset
!= 0 ||
1412 action_msg
->cfa_out_pass_offset
!= 0)
1413 error
= cfil_action_data_pass(so
, kcunit
, 1,
1414 action_msg
->cfa_out_pass_offset
,
1415 action_msg
->cfa_out_peek_offset
);
1416 if (error
== EJUSTRETURN
)
1420 if (action_msg
->cfa_in_peek_offset
!= 0 ||
1421 action_msg
->cfa_in_pass_offset
!= 0)
1422 error
= cfil_action_data_pass(so
, kcunit
, 0,
1423 action_msg
->cfa_in_pass_offset
,
1424 action_msg
->cfa_in_peek_offset
);
1425 if (error
== EJUSTRETURN
)
1430 error
= cfil_action_drop(so
, kcunit
);
1438 socket_unlock(so
, 1);
1443 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_ok
);
1445 OSIncrementAtomic(&cfil_stats
.cfs_ctl_send_bad
);
1451 cfil_ctl_getopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1452 int opt
, void *data
, size_t *len
)
1454 #pragma unused(kctlref, opt)
1456 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1458 CFIL_LOG(LOG_NOTICE
, "");
1460 cfil_rw_lock_shared(&cfil_lck_rw
);
1462 if (content_filters
== NULL
) {
1463 CFIL_LOG(LOG_ERR
, "no content filter");
1467 if (kcunit
> MAX_CONTENT_FILTER
) {
1468 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1469 kcunit
, MAX_CONTENT_FILTER
);
1473 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1474 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1480 case CFIL_OPT_NECP_CONTROL_UNIT
:
1481 if (*len
< sizeof(uint32_t)) {
1482 CFIL_LOG(LOG_ERR
, "len too small %lu", *len
);
1487 *(uint32_t *)data
= cfc
->cf_necp_control_unit
;
1490 case CFIL_OPT_GET_SOCKET_INFO
:
1491 if (*len
!= sizeof(struct cfil_opt_sock_info
)) {
1492 CFIL_LOG(LOG_ERR
, "len does not match %lu", *len
);
1497 CFIL_LOG(LOG_ERR
, "data not passed");
1502 struct cfil_opt_sock_info
*sock_info
=
1503 (struct cfil_opt_sock_info
*) data
;
1504 struct socket
*sock
=
1505 cfil_socket_from_sock_id(sock_info
->cfs_sock_id
);
1507 CFIL_LOG(LOG_NOTICE
, "bad sock_id %llx",
1508 sock_info
->cfs_sock_id
);
1513 // Unlock here so that we never hold both cfil_lck_rw and the
1514 // socket_lock at the same time. Otherwise, this can deadlock
1515 // because soclose() takes the socket_lock and then exclusive
1516 // cfil_lck_rw and we require the opposite order.
1518 // WARNING: Be sure to never use anything protected
1519 // by cfil_lck_rw beyond this point.
1520 // WARNING: Be sure to avoid fallthrough and
1521 // goto return_already_unlocked from this branch.
1522 cfil_rw_unlock_shared(&cfil_lck_rw
);
1524 socket_lock(sock
, 1);
1526 if (sock
->so_cfil
== NULL
) {
1527 CFIL_LOG(LOG_NOTICE
, "so %llx not attached, cannot fetch info",
1528 (uint64_t)VM_KERNEL_ADDRPERM(sock
));
1530 socket_unlock(sock
, 1);
1531 goto return_already_unlocked
;
1534 // Fill out family, type, and protocol
1535 sock_info
->cfs_sock_family
= sock
->so_proto
->pr_domain
->dom_family
;
1536 sock_info
->cfs_sock_type
= sock
->so_proto
->pr_type
;
1537 sock_info
->cfs_sock_protocol
= sock
->so_proto
->pr_protocol
;
1539 // Source and destination addresses
1540 struct inpcb
*inp
= sotoinpcb(sock
);
1541 if (inp
->inp_vflag
& INP_IPV6
) {
1542 fill_ip6_sockaddr_4_6(&sock_info
->cfs_local
,
1543 &inp
->in6p_laddr
, inp
->inp_lport
);
1544 fill_ip6_sockaddr_4_6(&sock_info
->cfs_remote
,
1545 &inp
->in6p_faddr
, inp
->inp_fport
);
1546 } else if (inp
->inp_vflag
& INP_IPV4
) {
1547 fill_ip_sockaddr_4_6(&sock_info
->cfs_local
,
1548 inp
->inp_laddr
, inp
->inp_lport
);
1549 fill_ip_sockaddr_4_6(&sock_info
->cfs_remote
,
1550 inp
->inp_faddr
, inp
->inp_fport
);
1554 sock_info
->cfs_pid
= sock
->last_pid
;
1555 memcpy(sock_info
->cfs_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1557 if (sock
->so_flags
& SOF_DELEGATED
) {
1558 sock_info
->cfs_e_pid
= sock
->e_pid
;
1559 memcpy(sock_info
->cfs_e_uuid
, sock
->e_uuid
, sizeof(uuid_t
));
1561 sock_info
->cfs_e_pid
= sock
->last_pid
;
1562 memcpy(sock_info
->cfs_e_uuid
, sock
->last_uuid
, sizeof(uuid_t
));
1565 socket_unlock(sock
, 1);
1567 goto return_already_unlocked
;
1569 error
= ENOPROTOOPT
;
1573 cfil_rw_unlock_shared(&cfil_lck_rw
);
1577 return_already_unlocked
:
1583 cfil_ctl_setopt(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
1584 int opt
, void *data
, size_t len
)
1586 #pragma unused(kctlref, opt)
1588 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1590 CFIL_LOG(LOG_NOTICE
, "");
1592 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1594 if (content_filters
== NULL
) {
1595 CFIL_LOG(LOG_ERR
, "no content filter");
1599 if (kcunit
> MAX_CONTENT_FILTER
) {
1600 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1601 kcunit
, MAX_CONTENT_FILTER
);
1605 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1606 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1612 case CFIL_OPT_NECP_CONTROL_UNIT
:
1613 if (len
< sizeof(uint32_t)) {
1614 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1615 "len too small %lu", len
);
1619 if (cfc
->cf_necp_control_unit
!= 0) {
1620 CFIL_LOG(LOG_ERR
, "CFIL_OPT_NECP_CONTROL_UNIT "
1622 cfc
->cf_necp_control_unit
);
1626 cfc
->cf_necp_control_unit
= *(uint32_t *)data
;
1629 error
= ENOPROTOOPT
;
1633 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1640 cfil_ctl_rcvd(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
, int flags
)
1642 #pragma unused(kctlref, flags)
1643 struct content_filter
*cfc
= (struct content_filter
*)unitinfo
;
1644 struct socket
*so
= NULL
;
1646 struct cfil_entry
*entry
;
1648 CFIL_LOG(LOG_INFO
, "");
1650 if (content_filters
== NULL
) {
1651 CFIL_LOG(LOG_ERR
, "no content filter");
1652 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1655 if (kcunit
> MAX_CONTENT_FILTER
) {
1656 CFIL_LOG(LOG_ERR
, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1657 kcunit
, MAX_CONTENT_FILTER
);
1658 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1661 cfil_rw_lock_shared(&cfil_lck_rw
);
1662 if (cfc
!= (void *)content_filters
[kcunit
- 1]) {
1663 CFIL_LOG(LOG_ERR
, "unitinfo does not match for kcunit %u",
1665 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_bad
);
1668 /* Let's assume the flow control is lifted */
1669 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
1670 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
1671 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1673 cfc
->cf_flags
&= ~CFF_FLOW_CONTROLLED
;
1675 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw
);
1676 LCK_RW_ASSERT(&cfil_lck_rw
, LCK_RW_ASSERT_SHARED
);
1679 * Flow control will be raised again as soon as an entry cannot enqueue
1680 * to the kernel control socket
1682 while ((cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) == 0) {
1683 verify_content_filter(cfc
);
1685 cfil_rw_lock_assert_held(&cfil_lck_rw
, 0);
1687 /* Find an entry that is flow controlled */
1688 TAILQ_FOREACH(entry
, &cfc
->cf_sock_entries
, cfe_link
) {
1689 if (entry
->cfe_cfil_info
== NULL
||
1690 entry
->cfe_cfil_info
->cfi_so
== NULL
)
1692 if ((entry
->cfe_flags
& CFEF_FLOW_CONTROLLED
) == 0)
1698 OSIncrementAtomic(&cfil_stats
.cfs_ctl_rcvd_flow_lift
);
1700 so
= entry
->cfe_cfil_info
->cfi_so
;
1702 cfil_rw_unlock_shared(&cfil_lck_rw
);
1706 error
= cfil_acquire_sockbuf(so
, 1);
1708 error
= cfil_data_service_ctl_q(so
, kcunit
, 1);
1709 cfil_release_sockbuf(so
, 1);
1713 error
= cfil_acquire_sockbuf(so
, 0);
1715 error
= cfil_data_service_ctl_q(so
, kcunit
, 0);
1716 cfil_release_sockbuf(so
, 0);
1719 socket_lock_assert_owned(so
);
1720 socket_unlock(so
, 1);
1722 cfil_rw_lock_shared(&cfil_lck_rw
);
1725 cfil_rw_unlock_shared(&cfil_lck_rw
);
1731 struct kern_ctl_reg kern_ctl
;
1733 vm_size_t content_filter_size
= 0; /* size of content_filter */
1734 vm_size_t cfil_info_size
= 0; /* size of cfil_info */
1736 CFIL_LOG(LOG_NOTICE
, "");
1739 * Compile time verifications
1741 _CASSERT(CFIL_MAX_FILTER_COUNT
== MAX_CONTENT_FILTER
);
1742 _CASSERT(sizeof(struct cfil_filter_stat
) % sizeof(uint32_t) == 0);
1743 _CASSERT(sizeof(struct cfil_entry_stat
) % sizeof(uint32_t) == 0);
1744 _CASSERT(sizeof(struct cfil_sock_stat
) % sizeof(uint32_t) == 0);
1747 * Runtime time verifications
1749 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_enqueued
,
1751 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_enqueued
,
1753 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_in_peeked
,
1755 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_ctl_q_out_peeked
,
1758 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_in_enqueued
,
1760 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_pending_q_out_enqueued
,
1763 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_enqueued
,
1765 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_enqueued
,
1767 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_in_passed
,
1769 VERIFY(IS_P2ALIGNED(&cfil_stats
.cfs_inject_q_out_passed
,
1773 * Zone for content filters kernel control sockets
1775 content_filter_size
= sizeof(struct content_filter
);
1776 content_filter_zone
= zinit(content_filter_size
,
1777 CONTENT_FILTER_ZONE_MAX
* content_filter_size
,
1779 CONTENT_FILTER_ZONE_NAME
);
1780 if (content_filter_zone
== NULL
) {
1781 panic("%s: zinit(%s) failed", __func__
,
1782 CONTENT_FILTER_ZONE_NAME
);
1785 zone_change(content_filter_zone
, Z_CALLERACCT
, FALSE
);
1786 zone_change(content_filter_zone
, Z_EXPAND
, TRUE
);
1789 * Zone for per socket content filters
1791 cfil_info_size
= sizeof(struct cfil_info
);
1792 cfil_info_zone
= zinit(cfil_info_size
,
1793 CFIL_INFO_ZONE_MAX
* cfil_info_size
,
1795 CFIL_INFO_ZONE_NAME
);
1796 if (cfil_info_zone
== NULL
) {
1797 panic("%s: zinit(%s) failed", __func__
, CFIL_INFO_ZONE_NAME
);
1800 zone_change(cfil_info_zone
, Z_CALLERACCT
, FALSE
);
1801 zone_change(cfil_info_zone
, Z_EXPAND
, TRUE
);
1806 cfil_lck_grp_attr
= lck_grp_attr_alloc_init();
1807 if (cfil_lck_grp_attr
== NULL
) {
1808 panic("%s: lck_grp_attr_alloc_init failed", __func__
);
1811 cfil_lck_grp
= lck_grp_alloc_init("content filter",
1813 if (cfil_lck_grp
== NULL
) {
1814 panic("%s: lck_grp_alloc_init failed", __func__
);
1817 cfil_lck_attr
= lck_attr_alloc_init();
1818 if (cfil_lck_attr
== NULL
) {
1819 panic("%s: lck_attr_alloc_init failed", __func__
);
1822 lck_rw_init(&cfil_lck_rw
, cfil_lck_grp
, cfil_lck_attr
);
1824 TAILQ_INIT(&cfil_sock_head
);
1827 * Register kernel control
1829 bzero(&kern_ctl
, sizeof(kern_ctl
));
1830 strlcpy(kern_ctl
.ctl_name
, CONTENT_FILTER_CONTROL_NAME
,
1831 sizeof(kern_ctl
.ctl_name
));
1832 kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
| CTL_FLAG_REG_EXTENDED
;
1833 kern_ctl
.ctl_sendsize
= 512 * 1024; /* enough? */
1834 kern_ctl
.ctl_recvsize
= 512 * 1024; /* enough? */
1835 kern_ctl
.ctl_connect
= cfil_ctl_connect
;
1836 kern_ctl
.ctl_disconnect
= cfil_ctl_disconnect
;
1837 kern_ctl
.ctl_send
= cfil_ctl_send
;
1838 kern_ctl
.ctl_getopt
= cfil_ctl_getopt
;
1839 kern_ctl
.ctl_setopt
= cfil_ctl_setopt
;
1840 kern_ctl
.ctl_rcvd
= cfil_ctl_rcvd
;
1841 error
= ctl_register(&kern_ctl
, &cfil_kctlref
);
1843 CFIL_LOG(LOG_ERR
, "ctl_register failed: %d", error
);
1849 cfil_info_alloc(struct socket
*so
)
1852 struct cfil_info
*cfil_info
= NULL
;
1853 struct inpcb
*inp
= sotoinpcb(so
);
1855 CFIL_LOG(LOG_INFO
, "");
1857 socket_lock_assert_owned(so
);
1859 cfil_info
= zalloc(cfil_info_zone
);
1860 if (cfil_info
== NULL
)
1862 bzero(cfil_info
, sizeof(struct cfil_info
));
1864 cfil_queue_init(&cfil_info
->cfi_snd
.cfi_inject_q
);
1865 cfil_queue_init(&cfil_info
->cfi_rcv
.cfi_inject_q
);
1867 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
1868 struct cfil_entry
*entry
;
1870 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1871 entry
->cfe_cfil_info
= cfil_info
;
1873 /* Initialize the filter entry */
1874 entry
->cfe_filter
= NULL
;
1875 entry
->cfe_flags
= 0;
1876 entry
->cfe_necp_control_unit
= 0;
1877 entry
->cfe_snd
.cfe_pass_offset
= 0;
1878 entry
->cfe_snd
.cfe_peek_offset
= 0;
1879 entry
->cfe_snd
.cfe_peeked
= 0;
1880 entry
->cfe_rcv
.cfe_pass_offset
= 0;
1881 entry
->cfe_rcv
.cfe_peek_offset
= 0;
1882 entry
->cfe_rcv
.cfe_peeked
= 0;
1884 cfil_queue_init(&entry
->cfe_snd
.cfe_pending_q
);
1885 cfil_queue_init(&entry
->cfe_rcv
.cfe_pending_q
);
1886 cfil_queue_init(&entry
->cfe_snd
.cfe_ctl_q
);
1887 cfil_queue_init(&entry
->cfe_rcv
.cfe_ctl_q
);
1890 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1892 so
->so_cfil
= cfil_info
;
1893 cfil_info
->cfi_so
= so
;
1895 * Create a cfi_sock_id that's not the socket pointer!
1897 if (inp
->inp_flowhash
== 0)
1898 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
1899 cfil_info
->cfi_sock_id
=
1900 ((so
->so_gencnt
<< 32) | inp
->inp_flowhash
);
1902 TAILQ_INSERT_TAIL(&cfil_sock_head
, cfil_info
, cfi_link
);
1904 cfil_sock_attached_count
++;
1906 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1909 if (cfil_info
!= NULL
)
1910 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_ok
);
1912 OSIncrementAtomic(&cfil_stats
.cfs_cfi_alloc_fail
);
1918 cfil_info_attach_unit(struct socket
*so
, uint32_t filter_control_unit
)
1921 struct cfil_info
*cfil_info
= so
->so_cfil
;
1924 CFIL_LOG(LOG_INFO
, "");
1926 socket_lock_assert_owned(so
);
1928 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1931 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1933 struct content_filter
*cfc
= content_filters
[kcunit
- 1];
1934 struct cfil_entry
*entry
;
1938 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
1941 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1943 entry
->cfe_filter
= cfc
;
1944 entry
->cfe_necp_control_unit
= filter_control_unit
;
1945 TAILQ_INSERT_TAIL(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1946 cfc
->cf_sock_count
++;
1947 verify_content_filter(cfc
);
1949 entry
->cfe_flags
|= CFEF_CFIL_ATTACHED
;
1953 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
1959 cfil_info_free(struct socket
*so
, struct cfil_info
*cfil_info
)
1962 uint64_t in_drain
= 0;
1963 uint64_t out_drained
= 0;
1967 if (so
->so_flags
& SOF_CONTENT_FILTER
) {
1968 so
->so_flags
&= ~SOF_CONTENT_FILTER
;
1969 VERIFY(so
->so_usecount
> 0);
1972 if (cfil_info
== NULL
)
1975 CFIL_LOG(LOG_INFO
, "");
1977 cfil_rw_lock_exclusive(&cfil_lck_rw
);
1980 content_filters
!= NULL
&& kcunit
<= MAX_CONTENT_FILTER
;
1982 struct cfil_entry
*entry
;
1983 struct content_filter
*cfc
;
1985 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
1987 /* Don't be silly and try to detach twice */
1988 if (entry
->cfe_filter
== NULL
)
1991 cfc
= content_filters
[kcunit
- 1];
1993 VERIFY(cfc
== entry
->cfe_filter
);
1995 entry
->cfe_filter
= NULL
;
1996 entry
->cfe_necp_control_unit
= 0;
1997 TAILQ_REMOVE(&cfc
->cf_sock_entries
, entry
, cfe_link
);
1998 cfc
->cf_sock_count
--;
2000 verify_content_filter(cfc
);
2002 cfil_sock_attached_count
--;
2003 TAILQ_REMOVE(&cfil_sock_head
, cfil_info
, cfi_link
);
2005 out_drained
+= cfil_queue_drain(&cfil_info
->cfi_snd
.cfi_inject_q
);
2006 in_drain
+= cfil_queue_drain(&cfil_info
->cfi_rcv
.cfi_inject_q
);
2008 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2009 struct cfil_entry
*entry
;
2011 entry
= &cfil_info
->cfi_entries
[kcunit
- 1];
2012 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_pending_q
);
2013 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_pending_q
);
2014 out_drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
2015 in_drain
+= cfil_queue_drain(&entry
->cfe_rcv
.cfe_ctl_q
);
2017 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2020 OSIncrementAtomic(&cfil_stats
.cfs_flush_out_free
);
2022 OSIncrementAtomic(&cfil_stats
.cfs_flush_in_free
);
2024 zfree(cfil_info_zone
, cfil_info
);
2028 * Entry point from Sockets layer
2029 * The socket is locked.
2032 cfil_sock_attach(struct socket
*so
)
2035 uint32_t filter_control_unit
;
2037 socket_lock_assert_owned(so
);
2039 /* Limit ourselves to TCP that are not MPTCP subflows */
2040 if ((so
->so_proto
->pr_domain
->dom_family
!= PF_INET
&&
2041 so
->so_proto
->pr_domain
->dom_family
!= PF_INET6
) ||
2042 so
->so_proto
->pr_type
!= SOCK_STREAM
||
2043 so
->so_proto
->pr_protocol
!= IPPROTO_TCP
||
2044 (so
->so_flags
& SOF_MP_SUBFLOW
) != 0 ||
2045 (so
->so_flags1
& SOF1_CONTENT_FILTER_SKIP
) != 0)
2048 filter_control_unit
= necp_socket_get_content_filter_control_unit(so
);
2049 if (filter_control_unit
== 0)
2052 if ((filter_control_unit
& NECP_MASK_USERSPACE_ONLY
) != 0) {
2053 OSIncrementAtomic(&cfil_stats
.cfs_sock_userspace_only
);
2056 if (cfil_active_count
== 0) {
2057 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_in_vain
);
2060 if (so
->so_cfil
!= NULL
) {
2061 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_already
);
2062 CFIL_LOG(LOG_ERR
, "already attached");
2064 cfil_info_alloc(so
);
2065 if (so
->so_cfil
== NULL
) {
2067 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_no_mem
);
2071 if (cfil_info_attach_unit(so
, filter_control_unit
) == 0) {
2072 CFIL_LOG(LOG_ERR
, "cfil_info_attach_unit(%u) failed",
2073 filter_control_unit
);
2074 OSIncrementAtomic(&cfil_stats
.cfs_sock_attach_failed
);
2077 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u sockid %llx",
2078 (uint64_t)VM_KERNEL_ADDRPERM(so
),
2079 filter_control_unit
, so
->so_cfil
->cfi_sock_id
);
2081 so
->so_flags
|= SOF_CONTENT_FILTER
;
2082 OSIncrementAtomic(&cfil_stats
.cfs_sock_attached
);
2084 /* Hold a reference on the socket */
2087 error
= cfil_dispatch_attach_event(so
, filter_control_unit
);
2088 /* We can recover from flow control or out of memory errors */
2089 if (error
== ENOBUFS
|| error
== ENOMEM
)
2091 else if (error
!= 0)
2094 CFIL_INFO_VERIFY(so
->so_cfil
);
2100 * Entry point from Sockets layer
2101 * The socket is locked.
2104 cfil_sock_detach(struct socket
*so
)
2107 cfil_info_free(so
, so
->so_cfil
);
2108 OSIncrementAtomic(&cfil_stats
.cfs_sock_detached
);
2114 cfil_dispatch_attach_event(struct socket
*so
, uint32_t filter_control_unit
)
2117 struct cfil_entry
*entry
= NULL
;
2118 struct cfil_msg_sock_attached msg_attached
;
2120 struct content_filter
*cfc
= NULL
;
2122 socket_lock_assert_owned(so
);
2124 cfil_rw_lock_shared(&cfil_lck_rw
);
2126 if (so
->so_proto
== NULL
|| so
->so_proto
->pr_domain
== NULL
) {
2131 * Find the matching filter unit
2133 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
2134 cfc
= content_filters
[kcunit
- 1];
2138 if (cfc
->cf_necp_control_unit
!= filter_control_unit
)
2140 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2141 if (entry
->cfe_filter
== NULL
)
2144 VERIFY(cfc
== entry
->cfe_filter
);
2149 if (entry
== NULL
|| entry
->cfe_filter
== NULL
)
2152 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
))
2155 CFIL_LOG(LOG_INFO
, "so %llx filter_control_unit %u kcunit %u",
2156 (uint64_t)VM_KERNEL_ADDRPERM(so
), filter_control_unit
, kcunit
);
2158 /* Would be wasteful to try when flow controlled */
2159 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2164 bzero(&msg_attached
, sizeof(struct cfil_msg_sock_attached
));
2165 msg_attached
.cfs_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_attached
);
2166 msg_attached
.cfs_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2167 msg_attached
.cfs_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2168 msg_attached
.cfs_msghdr
.cfm_op
= CFM_OP_SOCKET_ATTACHED
;
2169 msg_attached
.cfs_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2171 msg_attached
.cfs_sock_family
= so
->so_proto
->pr_domain
->dom_family
;
2172 msg_attached
.cfs_sock_type
= so
->so_proto
->pr_type
;
2173 msg_attached
.cfs_sock_protocol
= so
->so_proto
->pr_protocol
;
2174 msg_attached
.cfs_pid
= so
->last_pid
;
2175 memcpy(msg_attached
.cfs_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2176 if (so
->so_flags
& SOF_DELEGATED
) {
2177 msg_attached
.cfs_e_pid
= so
->e_pid
;
2178 memcpy(msg_attached
.cfs_e_uuid
, so
->e_uuid
, sizeof(uuid_t
));
2180 msg_attached
.cfs_e_pid
= so
->last_pid
;
2181 memcpy(msg_attached
.cfs_e_uuid
, so
->last_uuid
, sizeof(uuid_t
));
2183 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2184 entry
->cfe_filter
->cf_kcunit
,
2186 sizeof(struct cfil_msg_sock_attached
),
2189 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d", error
);
2192 microuptime(&entry
->cfe_last_event
);
2193 so
->so_cfil
->cfi_first_event
.tv_sec
= entry
->cfe_last_event
.tv_sec
;
2194 so
->so_cfil
->cfi_first_event
.tv_usec
= entry
->cfe_last_event
.tv_usec
;
2196 entry
->cfe_flags
|= CFEF_SENT_SOCK_ATTACHED
;
2197 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_ok
);
2200 /* We can recover from flow control */
2201 if (error
== ENOBUFS
) {
2202 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2203 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_flow_control
);
2205 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2206 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2208 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2210 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2213 OSIncrementAtomic(&cfil_stats
.cfs_attach_event_fail
);
2215 cfil_rw_unlock_shared(&cfil_lck_rw
);
2221 cfil_dispatch_disconnect_event(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2224 struct mbuf
*msg
= NULL
;
2225 struct cfil_entry
*entry
;
2226 struct cfe_buf
*entrybuf
;
2227 struct cfil_msg_hdr msg_disconnected
;
2228 struct content_filter
*cfc
;
2230 socket_lock_assert_owned(so
);
2232 cfil_rw_lock_shared(&cfil_lck_rw
);
2234 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2236 entrybuf
= &entry
->cfe_snd
;
2238 entrybuf
= &entry
->cfe_rcv
;
2240 cfc
= entry
->cfe_filter
;
2244 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2245 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2248 * Send the disconnection event once
2250 if ((outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
)) ||
2251 (!outgoing
&& (entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))) {
2252 CFIL_LOG(LOG_INFO
, "so %llx disconnect already sent",
2253 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2258 * We're not disconnected as long as some data is waiting
2259 * to be delivered to the filter
2261 if (outgoing
&& cfil_queue_empty(&entrybuf
->cfe_ctl_q
) == 0) {
2262 CFIL_LOG(LOG_INFO
, "so %llx control queue not empty",
2263 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2267 /* Would be wasteful to try when flow controlled */
2268 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2273 bzero(&msg_disconnected
, sizeof(struct cfil_msg_hdr
));
2274 msg_disconnected
.cfm_len
= sizeof(struct cfil_msg_hdr
);
2275 msg_disconnected
.cfm_version
= CFM_VERSION_CURRENT
;
2276 msg_disconnected
.cfm_type
= CFM_TYPE_EVENT
;
2277 msg_disconnected
.cfm_op
= outgoing
? CFM_OP_DISCONNECT_OUT
:
2278 CFM_OP_DISCONNECT_IN
;
2279 msg_disconnected
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2280 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2281 entry
->cfe_filter
->cf_kcunit
,
2283 sizeof(struct cfil_msg_hdr
),
2286 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2290 microuptime(&entry
->cfe_last_event
);
2291 CFI_ADD_TIME_LOG(so
->so_cfil
, &entry
->cfe_last_event
, &so
->so_cfil
->cfi_first_event
, msg_disconnected
.cfm_op
);
2293 /* Remember we have sent the disconnection message */
2295 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_OUT
;
2296 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_out_event_ok
);
2298 entry
->cfe_flags
|= CFEF_SENT_DISCONNECT_IN
;
2299 OSIncrementAtomic(&cfil_stats
.cfs_disconnect_in_event_ok
);
2302 if (error
== ENOBUFS
) {
2303 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2305 &cfil_stats
.cfs_disconnect_event_flow_control
);
2307 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2308 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2310 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2312 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2316 &cfil_stats
.cfs_disconnect_event_fail
);
2318 cfil_rw_unlock_shared(&cfil_lck_rw
);
2324 cfil_dispatch_closed_event(struct socket
*so
, int kcunit
)
2326 struct cfil_entry
*entry
;
2327 struct cfil_msg_sock_closed msg_closed
;
2329 struct content_filter
*cfc
;
2331 socket_lock_assert_owned(so
);
2333 cfil_rw_lock_shared(&cfil_lck_rw
);
2335 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2336 cfc
= entry
->cfe_filter
;
2340 CFIL_LOG(LOG_INFO
, "so %llx kcunit %d",
2341 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
2343 /* Would be wasteful to try when flow controlled */
2344 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2349 * Send a single closed message per filter
2351 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_CLOSED
) != 0)
2353 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
2356 microuptime(&entry
->cfe_last_event
);
2357 CFI_ADD_TIME_LOG(so
->so_cfil
, &entry
->cfe_last_event
, &so
->so_cfil
->cfi_first_event
, CFM_OP_SOCKET_CLOSED
);
2359 bzero(&msg_closed
, sizeof(struct cfil_msg_sock_closed
));
2360 msg_closed
.cfc_msghdr
.cfm_len
= sizeof(struct cfil_msg_sock_closed
);
2361 msg_closed
.cfc_msghdr
.cfm_version
= CFM_VERSION_CURRENT
;
2362 msg_closed
.cfc_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2363 msg_closed
.cfc_msghdr
.cfm_op
= CFM_OP_SOCKET_CLOSED
;
2364 msg_closed
.cfc_msghdr
.cfm_sock_id
= entry
->cfe_cfil_info
->cfi_sock_id
;
2365 msg_closed
.cfc_first_event
.tv_sec
= so
->so_cfil
->cfi_first_event
.tv_sec
;
2366 msg_closed
.cfc_first_event
.tv_usec
= so
->so_cfil
->cfi_first_event
.tv_usec
;
2367 memcpy(msg_closed
.cfc_op_time
, so
->so_cfil
->cfi_op_time
, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY
);
2368 memcpy(msg_closed
.cfc_op_list
, so
->so_cfil
->cfi_op_list
, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY
);
2369 msg_closed
.cfc_op_list_ctr
= so
->so_cfil
->cfi_op_list_ctr
;
2371 CFIL_LOG(LOG_INFO
, "sock id %llu, op ctr %d, start time %llu.%llu", msg_closed
.cfc_msghdr
.cfm_sock_id
, so
->so_cfil
->cfi_op_list_ctr
, so
->so_cfil
->cfi_first_event
.tv_sec
, so
->so_cfil
->cfi_first_event
.tv_usec
);
2373 if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2374 msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2376 for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2377 CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2381 error
= ctl_enqueuedata(entry
->cfe_filter
->cf_kcref
,
2382 entry
->cfe_filter
->cf_kcunit
,
2384 sizeof(struct cfil_msg_sock_closed
),
2387 CFIL_LOG(LOG_ERR
, "ctl_enqueuedata() failed: %d",
2392 entry
->cfe_flags
|= CFEF_SENT_SOCK_CLOSED
;
2393 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_ok
);
2395 /* We can recover from flow control */
2396 if (error
== ENOBUFS
) {
2397 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2398 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_flow_control
);
2400 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2401 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2403 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2405 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2408 OSIncrementAtomic(&cfil_stats
.cfs_closed_event_fail
);
2410 cfil_rw_unlock_shared(&cfil_lck_rw
);
2417 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2418 struct in6_addr
*ip6
, u_int16_t port
)
2420 struct sockaddr_in6
*sin6
= &sin46
->sin6
;
2422 sin6
->sin6_family
= AF_INET6
;
2423 sin6
->sin6_len
= sizeof(*sin6
);
2424 sin6
->sin6_port
= port
;
2425 sin6
->sin6_addr
= *ip6
;
2426 if (IN6_IS_SCOPE_EMBED(&sin6
->sin6_addr
)) {
2427 sin6
->sin6_scope_id
= ntohs(sin6
->sin6_addr
.s6_addr16
[1]);
2428 sin6
->sin6_addr
.s6_addr16
[1] = 0;
2433 fill_ip_sockaddr_4_6(union sockaddr_in_4_6
*sin46
,
2434 struct in_addr ip
, u_int16_t port
)
2436 struct sockaddr_in
*sin
= &sin46
->sin
;
2438 sin
->sin_family
= AF_INET
;
2439 sin
->sin_len
= sizeof(*sin
);
2440 sin
->sin_port
= port
;
2441 sin
->sin_addr
.s_addr
= ip
.s_addr
;
2445 cfil_dispatch_data_event(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2446 struct mbuf
*data
, unsigned int copyoffset
, unsigned int copylen
)
2449 struct mbuf
*copy
= NULL
;
2450 struct mbuf
*msg
= NULL
;
2451 unsigned int one
= 1;
2452 struct cfil_msg_data_event
*data_req
;
2454 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
2455 struct cfil_entry
*entry
;
2456 struct cfe_buf
*entrybuf
;
2457 struct content_filter
*cfc
;
2460 cfil_rw_lock_shared(&cfil_lck_rw
);
2462 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2464 entrybuf
= &entry
->cfe_snd
;
2466 entrybuf
= &entry
->cfe_rcv
;
2468 cfc
= entry
->cfe_filter
;
2472 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2473 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2475 socket_lock_assert_owned(so
);
2477 /* Would be wasteful to try */
2478 if (cfc
->cf_flags
& CFF_FLOW_CONTROLLED
) {
2483 /* Make a copy of the data to pass to kernel control socket */
2484 copy
= m_copym_mode(data
, copyoffset
, copylen
, M_DONTWAIT
,
2487 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2492 /* We need an mbuf packet for the message header */
2493 hdrsize
= sizeof(struct cfil_msg_data_event
);
2494 error
= mbuf_allocpacket(MBUF_DONTWAIT
, hdrsize
, &one
, &msg
);
2496 CFIL_LOG(LOG_ERR
, "mbuf_allocpacket() failed");
2499 * ENOBUFS is to indicate flow control
2504 mbuf_setlen(msg
, hdrsize
);
2505 mbuf_pkthdr_setlen(msg
, hdrsize
+ copylen
);
2507 data_req
= (struct cfil_msg_data_event
*)mbuf_data(msg
);
2508 bzero(data_req
, hdrsize
);
2509 data_req
->cfd_msghdr
.cfm_len
= hdrsize
+ copylen
;
2510 data_req
->cfd_msghdr
.cfm_version
= 1;
2511 data_req
->cfd_msghdr
.cfm_type
= CFM_TYPE_EVENT
;
2512 data_req
->cfd_msghdr
.cfm_op
=
2513 outgoing
? CFM_OP_DATA_OUT
: CFM_OP_DATA_IN
;
2514 data_req
->cfd_msghdr
.cfm_sock_id
=
2515 entry
->cfe_cfil_info
->cfi_sock_id
;
2516 data_req
->cfd_start_offset
= entrybuf
->cfe_peeked
;
2517 data_req
->cfd_end_offset
= entrybuf
->cfe_peeked
+ copylen
;
2521 * For non connected sockets need to copy addresses from passed
2524 if (inp
->inp_vflag
& INP_IPV6
) {
2526 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2527 &inp
->in6p_laddr
, inp
->inp_lport
);
2528 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2529 &inp
->in6p_faddr
, inp
->inp_fport
);
2531 fill_ip6_sockaddr_4_6(&data_req
->cfc_src
,
2532 &inp
->in6p_faddr
, inp
->inp_fport
);
2533 fill_ip6_sockaddr_4_6(&data_req
->cfc_dst
,
2534 &inp
->in6p_laddr
, inp
->inp_lport
);
2536 } else if (inp
->inp_vflag
& INP_IPV4
) {
2538 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2539 inp
->inp_laddr
, inp
->inp_lport
);
2540 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2541 inp
->inp_faddr
, inp
->inp_fport
);
2543 fill_ip_sockaddr_4_6(&data_req
->cfc_src
,
2544 inp
->inp_faddr
, inp
->inp_fport
);
2545 fill_ip_sockaddr_4_6(&data_req
->cfc_dst
,
2546 inp
->inp_laddr
, inp
->inp_lport
);
2551 CFI_ADD_TIME_LOG(so
->so_cfil
, &tv
, &so
->so_cfil
->cfi_first_event
, data_req
->cfd_msghdr
.cfm_op
);
2553 /* Pass the message to the content filter */
2554 error
= ctl_enqueuembuf(entry
->cfe_filter
->cf_kcref
,
2555 entry
->cfe_filter
->cf_kcunit
,
2558 CFIL_LOG(LOG_ERR
, "ctl_enqueuembuf() failed: %d", error
);
2562 entry
->cfe_flags
&= ~CFEF_FLOW_CONTROLLED
;
2563 OSIncrementAtomic(&cfil_stats
.cfs_data_event_ok
);
2565 if (error
== ENOBUFS
) {
2566 entry
->cfe_flags
|= CFEF_FLOW_CONTROLLED
;
2568 &cfil_stats
.cfs_data_event_flow_control
);
2570 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw
))
2571 cfil_rw_lock_exclusive(&cfil_lck_rw
);
2573 cfc
->cf_flags
|= CFF_FLOW_CONTROLLED
;
2575 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
2578 OSIncrementAtomic(&cfil_stats
.cfs_data_event_fail
);
2580 cfil_rw_unlock_shared(&cfil_lck_rw
);
2586 * Process the queue of data waiting to be delivered to content filter
2589 cfil_data_service_ctl_q(struct socket
*so
, uint32_t kcunit
, int outgoing
)
2592 struct mbuf
*data
, *tmp
= NULL
;
2593 unsigned int datalen
= 0, copylen
= 0, copyoffset
= 0;
2594 struct cfil_entry
*entry
;
2595 struct cfe_buf
*entrybuf
;
2596 uint64_t currentoffset
= 0;
2598 if (so
->so_cfil
== NULL
)
2601 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2602 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2604 socket_lock_assert_owned(so
);
2606 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2608 entrybuf
= &entry
->cfe_snd
;
2610 entrybuf
= &entry
->cfe_rcv
;
2612 /* Send attached message if not yet done */
2613 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0) {
2614 error
= cfil_dispatch_attach_event(so
, kcunit
);
2616 /* We can recover from flow control */
2617 if (error
== ENOBUFS
|| error
== ENOMEM
)
2621 } else if ((entry
->cfe_flags
& CFEF_DATA_START
) == 0) {
2622 OSIncrementAtomic(&cfil_stats
.cfs_ctl_q_not_started
);
2625 CFIL_LOG(LOG_DEBUG
, "pass_offset %llu peeked %llu peek_offset %llu",
2626 entrybuf
->cfe_pass_offset
,
2627 entrybuf
->cfe_peeked
,
2628 entrybuf
->cfe_peek_offset
);
2630 /* Move all data that can pass */
2631 while ((data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
)) != NULL
&&
2632 entrybuf
->cfe_ctl_q
.q_start
< entrybuf
->cfe_pass_offset
) {
2633 datalen
= cfil_data_length(data
, NULL
);
2636 if (entrybuf
->cfe_ctl_q
.q_start
+ datalen
<=
2637 entrybuf
->cfe_pass_offset
) {
2639 * The first mbuf can fully pass
2644 * The first mbuf can partially pass
2646 copylen
= entrybuf
->cfe_pass_offset
-
2647 entrybuf
->cfe_ctl_q
.q_start
;
2649 VERIFY(copylen
<= datalen
);
2652 "%llx first %llu peeked %llu pass %llu peek %llu"
2653 "datalen %u copylen %u",
2654 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2655 entrybuf
->cfe_ctl_q
.q_start
,
2656 entrybuf
->cfe_peeked
,
2657 entrybuf
->cfe_pass_offset
,
2658 entrybuf
->cfe_peek_offset
,
2662 * Data that passes has been peeked at explicitly or
2665 if (entrybuf
->cfe_ctl_q
.q_start
+ copylen
>
2666 entrybuf
->cfe_peeked
)
2667 entrybuf
->cfe_peeked
=
2668 entrybuf
->cfe_ctl_q
.q_start
+ copylen
;
2670 * Stop on partial pass
2672 if (copylen
< datalen
)
2675 /* All good, move full data from ctl queue to pending queue */
2676 cfil_queue_remove(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2678 cfil_queue_enqueue(&entrybuf
->cfe_pending_q
, data
, datalen
);
2680 OSAddAtomic64(datalen
,
2681 &cfil_stats
.cfs_pending_q_out_enqueued
);
2683 OSAddAtomic64(datalen
,
2684 &cfil_stats
.cfs_pending_q_in_enqueued
);
2686 CFIL_INFO_VERIFY(so
->so_cfil
);
2689 "%llx first %llu peeked %llu pass %llu peek %llu"
2690 "datalen %u copylen %u",
2691 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2692 entrybuf
->cfe_ctl_q
.q_start
,
2693 entrybuf
->cfe_peeked
,
2694 entrybuf
->cfe_pass_offset
,
2695 entrybuf
->cfe_peek_offset
,
2699 /* Now deal with remaining data the filter wants to peek at */
2700 for (data
= cfil_queue_first(&entrybuf
->cfe_ctl_q
),
2701 currentoffset
= entrybuf
->cfe_ctl_q
.q_start
;
2702 data
!= NULL
&& currentoffset
< entrybuf
->cfe_peek_offset
;
2703 data
= cfil_queue_next(&entrybuf
->cfe_ctl_q
, data
),
2704 currentoffset
+= datalen
) {
2705 datalen
= cfil_data_length(data
, NULL
);
2708 /* We've already peeked at this mbuf */
2709 if (currentoffset
+ datalen
<= entrybuf
->cfe_peeked
)
2712 * The data in the first mbuf may have been
2713 * partially peeked at
2715 copyoffset
= entrybuf
->cfe_peeked
- currentoffset
;
2716 VERIFY(copyoffset
< datalen
);
2717 copylen
= datalen
- copyoffset
;
2718 VERIFY(copylen
<= datalen
);
2720 * Do not copy more than needed
2722 if (currentoffset
+ copyoffset
+ copylen
>
2723 entrybuf
->cfe_peek_offset
) {
2724 copylen
= entrybuf
->cfe_peek_offset
-
2725 (currentoffset
+ copyoffset
);
2729 "%llx current %llu peeked %llu pass %llu peek %llu"
2730 "datalen %u copylen %u copyoffset %u",
2731 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2733 entrybuf
->cfe_peeked
,
2734 entrybuf
->cfe_pass_offset
,
2735 entrybuf
->cfe_peek_offset
,
2736 datalen
, copylen
, copyoffset
);
2739 * Stop if there is nothing more to peek at
2744 * Let the filter get a peek at this span of data
2746 error
= cfil_dispatch_data_event(so
, kcunit
,
2747 outgoing
, data
, copyoffset
, copylen
);
2749 /* On error, leave data in ctl_q */
2752 entrybuf
->cfe_peeked
+= copylen
;
2754 OSAddAtomic64(copylen
,
2755 &cfil_stats
.cfs_ctl_q_out_peeked
);
2757 OSAddAtomic64(copylen
,
2758 &cfil_stats
.cfs_ctl_q_in_peeked
);
2760 /* Stop when data could not be fully peeked at */
2761 if (copylen
+ copyoffset
< datalen
)
2764 CFIL_INFO_VERIFY(so
->so_cfil
);
2767 "%llx first %llu peeked %llu pass %llu peek %llu"
2768 "datalen %u copylen %u copyoffset %u",
2769 (uint64_t)VM_KERNEL_ADDRPERM(tmp
),
2771 entrybuf
->cfe_peeked
,
2772 entrybuf
->cfe_pass_offset
,
2773 entrybuf
->cfe_peek_offset
,
2774 datalen
, copylen
, copyoffset
);
2777 * Process data that has passed the filter
2779 error
= cfil_service_pending_queue(so
, kcunit
, outgoing
);
2781 CFIL_LOG(LOG_ERR
, "cfil_service_pending_queue() error %d",
2787 * Dispatch disconnect events that could not be sent
2789 if (so
->so_cfil
== NULL
)
2791 else if (outgoing
) {
2792 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) &&
2793 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_OUT
))
2794 cfil_dispatch_disconnect_event(so
, kcunit
, 1);
2796 if ((so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) &&
2797 !(entry
->cfe_flags
& CFEF_SENT_DISCONNECT_IN
))
2798 cfil_dispatch_disconnect_event(so
, kcunit
, 0);
2803 "first %llu peeked %llu pass %llu peek %llu",
2804 entrybuf
->cfe_ctl_q
.q_start
,
2805 entrybuf
->cfe_peeked
,
2806 entrybuf
->cfe_pass_offset
,
2807 entrybuf
->cfe_peek_offset
);
2809 CFIL_INFO_VERIFY(so
->so_cfil
);
2814 * cfil_data_filter()
2816 * Process data for a content filter installed on a socket
2819 cfil_data_filter(struct socket
*so
, uint32_t kcunit
, int outgoing
,
2820 struct mbuf
*data
, uint64_t datalen
)
2823 struct cfil_entry
*entry
;
2824 struct cfe_buf
*entrybuf
;
2826 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
2827 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
2829 socket_lock_assert_owned(so
);
2831 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
2833 entrybuf
= &entry
->cfe_snd
;
2835 entrybuf
= &entry
->cfe_rcv
;
2837 /* Are we attached to the filter? */
2838 if (entry
->cfe_filter
== NULL
) {
2843 /* Dispatch to filters */
2844 cfil_queue_enqueue(&entrybuf
->cfe_ctl_q
, data
, datalen
);
2846 OSAddAtomic64(datalen
,
2847 &cfil_stats
.cfs_ctl_q_out_enqueued
);
2849 OSAddAtomic64(datalen
,
2850 &cfil_stats
.cfs_ctl_q_in_enqueued
);
2852 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
2854 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
2858 * We have to return EJUSTRETURN in all cases to avoid double free
2861 error
= EJUSTRETURN
;
2863 CFIL_INFO_VERIFY(so
->so_cfil
);
2865 CFIL_LOG(LOG_INFO
, "return %d", error
);
2870 * cfil_service_inject_queue() re-inject data that passed the
2874 cfil_service_inject_queue(struct socket
*so
, int outgoing
)
2877 unsigned int datalen
;
2879 unsigned int copylen
;
2881 struct mbuf
*copy
= NULL
;
2882 struct cfi_buf
*cfi_buf
;
2883 struct cfil_queue
*inject_q
;
2884 int need_rwakeup
= 0;
2886 if (so
->so_cfil
== NULL
)
2889 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
2890 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
2892 socket_lock_assert_owned(so
);
2895 cfi_buf
= &so
->so_cfil
->cfi_snd
;
2896 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_OUT
;
2898 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
2899 so
->so_cfil
->cfi_flags
&= ~CFIF_RETRY_INJECT_IN
;
2901 inject_q
= &cfi_buf
->cfi_inject_q
;
2903 while ((data
= cfil_queue_first(inject_q
)) != NULL
) {
2904 datalen
= cfil_data_length(data
, &mbcnt
);
2906 CFIL_LOG(LOG_INFO
, "data %llx datalen %u",
2907 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
);
2909 /* Make a copy in case of injection error */
2910 copy
= m_copym_mode(data
, 0, M_COPYALL
, M_DONTWAIT
,
2913 CFIL_LOG(LOG_ERR
, "m_copym_mode() failed");
2918 if ((copylen
= m_length(copy
)) != datalen
)
2919 panic("%s so %p copylen %d != datalen %d",
2920 __func__
, so
, copylen
, datalen
);
2923 socket_unlock(so
, 0);
2926 * Set both DONTWAIT and NBIO flags are we really
2927 * do not want to block
2929 error
= sosend(so
, NULL
, NULL
,
2931 MSG_SKIPCFIL
| MSG_DONTWAIT
| MSG_NBIO
);
2936 CFIL_LOG(LOG_ERR
, "sosend() failed %d",
2940 copy
->m_flags
|= M_SKIPCFIL
;
2944 * This work only because we support plain TCP
2945 * For UDP, RAWIP, MPTCP and message TCP we'll
2946 * need to call the appropriate sbappendxxx()
2947 * of fix sock_inject_data_in()
2949 if (sbappendstream(&so
->so_rcv
, copy
))
2953 /* Need to reassess if filter is still attached after unlock */
2954 if (so
->so_cfil
== NULL
) {
2955 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
2956 (uint64_t)VM_KERNEL_ADDRPERM(so
));
2957 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_detached
);
2964 /* Injection successful */
2965 cfil_queue_remove(inject_q
, data
, datalen
);
2968 cfi_buf
->cfi_pending_first
+= datalen
;
2969 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
2970 cfil_info_buf_verify(cfi_buf
);
2973 OSAddAtomic64(datalen
,
2974 &cfil_stats
.cfs_inject_q_out_passed
);
2976 OSAddAtomic64(datalen
,
2977 &cfil_stats
.cfs_inject_q_in_passed
);
2980 /* A single wakeup is for several packets is more efficient */
2984 if (error
!= 0 && so
->so_cfil
) {
2985 if (error
== ENOBUFS
)
2986 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nobufs
);
2987 if (error
== ENOMEM
)
2988 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_nomem
);
2991 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_OUT
;
2992 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_fail
);
2994 so
->so_cfil
->cfi_flags
|= CFIF_RETRY_INJECT_IN
;
2995 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_fail
);
3002 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
)) {
3003 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3004 if (cfil_sock_data_pending(&so
->so_snd
) == 0)
3005 soshutdownlock_final(so
, SHUT_WR
);
3007 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3008 if (cfil_filters_attached(so
) == 0) {
3009 CFIL_LOG(LOG_INFO
, "so %llx waking",
3010 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3011 wakeup((caddr_t
)&so
->so_cfil
);
3015 CFIL_INFO_VERIFY(so
->so_cfil
);
3021 cfil_service_pending_queue(struct socket
*so
, uint32_t kcunit
, int outgoing
)
3023 uint64_t passlen
, curlen
;
3025 unsigned int datalen
;
3027 struct cfil_entry
*entry
;
3028 struct cfe_buf
*entrybuf
;
3029 struct cfil_queue
*pending_q
;
3031 CFIL_LOG(LOG_INFO
, "so %llx kcunit %u outgoing %d",
3032 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
, outgoing
);
3034 socket_lock_assert_owned(so
);
3036 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3038 entrybuf
= &entry
->cfe_snd
;
3040 entrybuf
= &entry
->cfe_rcv
;
3042 pending_q
= &entrybuf
->cfe_pending_q
;
3044 passlen
= entrybuf
->cfe_pass_offset
- pending_q
->q_start
;
3047 * Locate the chunks of data that we can pass to the next filter
3048 * A data chunk must be on mbuf boundaries
3051 while ((data
= cfil_queue_first(pending_q
)) != NULL
) {
3052 datalen
= cfil_data_length(data
, NULL
);
3055 "data %llx datalen %u passlen %llu curlen %llu",
3056 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
,
3059 if (curlen
+ datalen
> passlen
)
3062 cfil_queue_remove(pending_q
, data
, datalen
);
3067 kcunit
<= MAX_CONTENT_FILTER
;
3069 error
= cfil_data_filter(so
, kcunit
, outgoing
,
3071 /* 0 means passed so we can continue */
3075 /* When data has passed all filters, re-inject */
3079 &so
->so_cfil
->cfi_snd
.cfi_inject_q
,
3081 OSAddAtomic64(datalen
,
3082 &cfil_stats
.cfs_inject_q_out_enqueued
);
3085 &so
->so_cfil
->cfi_rcv
.cfi_inject_q
,
3087 OSAddAtomic64(datalen
,
3088 &cfil_stats
.cfs_inject_q_in_enqueued
);
3093 CFIL_INFO_VERIFY(so
->so_cfil
);
3099 cfil_update_data_offsets(struct socket
*so
, uint32_t kcunit
, int outgoing
,
3100 uint64_t pass_offset
, uint64_t peek_offset
)
3103 struct cfil_entry
*entry
= NULL
;
3104 struct cfe_buf
*entrybuf
;
3107 CFIL_LOG(LOG_INFO
, "pass %llu peek %llu", pass_offset
, peek_offset
);
3109 socket_lock_assert_owned(so
);
3111 if (so
->so_cfil
== NULL
) {
3112 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3113 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3116 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3117 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3118 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3123 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3125 entrybuf
= &entry
->cfe_snd
;
3127 entrybuf
= &entry
->cfe_rcv
;
3129 /* Record updated offsets for this content filter */
3130 if (pass_offset
> entrybuf
->cfe_pass_offset
) {
3131 entrybuf
->cfe_pass_offset
= pass_offset
;
3133 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3134 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3137 CFIL_LOG(LOG_INFO
, "pass_offset %llu <= cfe_pass_offset %llu",
3138 pass_offset
, entrybuf
->cfe_pass_offset
);
3140 /* Filter does not want or need to see data that's allowed to pass */
3141 if (peek_offset
> entrybuf
->cfe_pass_offset
&&
3142 peek_offset
> entrybuf
->cfe_peek_offset
) {
3143 entrybuf
->cfe_peek_offset
= peek_offset
;
3150 /* Move data held in control queue to pending queue if needed */
3151 error
= cfil_data_service_ctl_q(so
, kcunit
, outgoing
);
3153 CFIL_LOG(LOG_ERR
, "cfil_data_service_ctl_q() error %d",
3157 error
= EJUSTRETURN
;
3161 * The filter is effectively detached when pass all from both sides
3162 * or when the socket is closed and no more data is waiting
3163 * to be delivered to the filter
3165 if (entry
!= NULL
&&
3166 ((entry
->cfe_snd
.cfe_pass_offset
== CFM_MAX_OFFSET
&&
3167 entry
->cfe_rcv
.cfe_pass_offset
== CFM_MAX_OFFSET
) ||
3168 ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3169 cfil_queue_empty(&entry
->cfe_snd
.cfe_ctl_q
) &&
3170 cfil_queue_empty(&entry
->cfe_rcv
.cfe_ctl_q
)))) {
3171 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3172 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3173 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3174 if ((so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
) &&
3175 cfil_filters_attached(so
) == 0) {
3176 CFIL_LOG(LOG_INFO
, "so %llx waking",
3177 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3178 wakeup((caddr_t
)&so
->so_cfil
);
3181 CFIL_INFO_VERIFY(so
->so_cfil
);
3182 CFIL_LOG(LOG_INFO
, "return %d", error
);
3187 * Update pass offset for socket when no data is pending
3190 cfil_set_socket_pass_offset(struct socket
*so
, int outgoing
)
3192 struct cfi_buf
*cfi_buf
;
3193 struct cfil_entry
*entry
;
3194 struct cfe_buf
*entrybuf
;
3196 uint64_t pass_offset
= 0;
3198 if (so
->so_cfil
== NULL
)
3201 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d",
3202 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3204 socket_lock_assert_owned(so
);
3207 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3209 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3211 if (cfi_buf
->cfi_pending_last
- cfi_buf
->cfi_pending_first
== 0) {
3212 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3213 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3215 /* Are we attached to a filter? */
3216 if (entry
->cfe_filter
== NULL
)
3220 entrybuf
= &entry
->cfe_snd
;
3222 entrybuf
= &entry
->cfe_rcv
;
3224 if (pass_offset
== 0 ||
3225 entrybuf
->cfe_pass_offset
< pass_offset
)
3226 pass_offset
= entrybuf
->cfe_pass_offset
;
3228 cfi_buf
->cfi_pass_offset
= pass_offset
;
3235 cfil_action_data_pass(struct socket
*so
, uint32_t kcunit
, int outgoing
,
3236 uint64_t pass_offset
, uint64_t peek_offset
)
3240 CFIL_LOG(LOG_INFO
, "");
3242 socket_lock_assert_owned(so
);
3244 error
= cfil_acquire_sockbuf(so
, outgoing
);
3246 CFIL_LOG(LOG_INFO
, "so %llx %s dropped",
3247 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3248 outgoing
? "out" : "in");
3252 error
= cfil_update_data_offsets(so
, kcunit
, outgoing
,
3253 pass_offset
, peek_offset
);
3255 cfil_service_inject_queue(so
, outgoing
);
3257 cfil_set_socket_pass_offset(so
, outgoing
);
3259 CFIL_INFO_VERIFY(so
->so_cfil
);
3260 cfil_release_sockbuf(so
, outgoing
);
3267 cfil_flush_queues(struct socket
*so
)
3269 struct cfil_entry
*entry
;
3273 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3276 socket_lock_assert_owned(so
);
3279 * Flush the output queues and ignore errors as long as
3282 (void) cfil_acquire_sockbuf(so
, 1);
3283 if (so
->so_cfil
!= NULL
) {
3285 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3286 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3288 drained
+= cfil_queue_drain(&entry
->cfe_snd
.cfe_ctl_q
);
3289 drained
+= cfil_queue_drain(
3290 &entry
->cfe_snd
.cfe_pending_q
);
3292 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_snd
.cfi_inject_q
);
3294 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3296 &cfil_stats
.cfs_flush_out_drop
);
3299 &cfil_stats
.cfs_flush_out_close
);
3302 cfil_release_sockbuf(so
, 1);
3305 * Flush the input queues
3307 (void) cfil_acquire_sockbuf(so
, 0);
3308 if (so
->so_cfil
!= NULL
) {
3310 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3311 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3313 drained
+= cfil_queue_drain(
3314 &entry
->cfe_rcv
.cfe_ctl_q
);
3315 drained
+= cfil_queue_drain(
3316 &entry
->cfe_rcv
.cfe_pending_q
);
3318 drained
+= cfil_queue_drain(&so
->so_cfil
->cfi_rcv
.cfi_inject_q
);
3320 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
)
3322 &cfil_stats
.cfs_flush_in_drop
);
3325 &cfil_stats
.cfs_flush_in_close
);
3328 cfil_release_sockbuf(so
, 0);
3330 CFIL_INFO_VERIFY(so
->so_cfil
);
3334 cfil_action_drop(struct socket
*so
, uint32_t kcunit
)
3337 struct cfil_entry
*entry
;
3340 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3343 socket_lock_assert_owned(so
);
3345 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3347 /* Are we attached to the filter? */
3348 if (entry
->cfe_filter
== NULL
)
3351 so
->so_cfil
->cfi_flags
|= CFIF_DROP
;
3356 * Force the socket to be marked defunct
3357 * (forcing fixed along with rdar://19391339)
3359 error
= sosetdefunct(p
, so
,
3360 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
,
3363 /* Flush the socket buffer and disconnect */
3365 error
= sodefunct(p
, so
,
3366 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER
| SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
3368 /* The filter is done, mark as detached */
3369 entry
->cfe_flags
|= CFEF_CFIL_DETACHED
;
3370 CFIL_LOG(LOG_INFO
, "so %llx detached %u",
3371 (uint64_t)VM_KERNEL_ADDRPERM(so
), kcunit
);
3373 /* Pending data needs to go */
3374 cfil_flush_queues(so
);
3376 if (so
->so_cfil
&& (so
->so_cfil
->cfi_flags
& CFIF_CLOSE_WAIT
)) {
3377 if (cfil_filters_attached(so
) == 0) {
3378 CFIL_LOG(LOG_INFO
, "so %llx waking",
3379 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3380 wakeup((caddr_t
)&so
->so_cfil
);
3388 cfil_action_bless_client(uint32_t kcunit
, struct cfil_msg_hdr
*msghdr
)
3392 cfil_rw_lock_exclusive(&cfil_lck_rw
);
3394 bool cfil_attached
= false;
3395 struct cfil_msg_bless_client
*blessmsg
= (struct cfil_msg_bless_client
*)msghdr
;
3396 struct socket
*so
= cfil_socket_from_client_uuid(blessmsg
->cfb_client_uuid
, &cfil_attached
);
3400 // The client gets a pass automatically
3402 if (cfil_attached
) {
3403 (void)cfil_action_data_pass(so
, kcunit
, 1, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3404 (void)cfil_action_data_pass(so
, kcunit
, 0, CFM_MAX_OFFSET
, CFM_MAX_OFFSET
);
3406 so
->so_flags1
|= SOF1_CONTENT_FILTER_SKIP
;
3408 socket_unlock(so
, 1);
3411 cfil_rw_unlock_exclusive(&cfil_lck_rw
);
3417 cfil_update_entry_offsets(struct socket
*so
, int outgoing
, unsigned int datalen
)
3419 struct cfil_entry
*entry
;
3420 struct cfe_buf
*entrybuf
;
3423 CFIL_LOG(LOG_INFO
, "so %llx outgoing %d datalen %u",
3424 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
, datalen
);
3426 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3427 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3429 /* Are we attached to the filter? */
3430 if (entry
->cfe_filter
== NULL
)
3434 entrybuf
= &entry
->cfe_snd
;
3436 entrybuf
= &entry
->cfe_rcv
;
3438 entrybuf
->cfe_ctl_q
.q_start
+= datalen
;
3439 entrybuf
->cfe_pass_offset
= entrybuf
->cfe_ctl_q
.q_start
;
3440 entrybuf
->cfe_peeked
= entrybuf
->cfe_ctl_q
.q_start
;
3441 if (entrybuf
->cfe_peek_offset
< entrybuf
->cfe_pass_offset
)
3442 entrybuf
->cfe_peek_offset
= entrybuf
->cfe_pass_offset
;
3444 entrybuf
->cfe_ctl_q
.q_end
+= datalen
;
3446 entrybuf
->cfe_pending_q
.q_start
+= datalen
;
3447 entrybuf
->cfe_pending_q
.q_end
+= datalen
;
3449 CFIL_INFO_VERIFY(so
->so_cfil
);
3454 cfil_data_common(struct socket
*so
, int outgoing
, struct sockaddr
*to
,
3455 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3457 #pragma unused(to, control, flags)
3459 unsigned int datalen
;
3462 struct cfi_buf
*cfi_buf
;
3464 if (so
->so_cfil
== NULL
) {
3465 CFIL_LOG(LOG_ERR
, "so %llx cfil detached",
3466 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3469 } else if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3470 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3471 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3476 datalen
= cfil_data_length(data
, &mbcnt
);
3478 CFIL_LOG(LOG_INFO
, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3479 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3480 outgoing
? "out" : "in",
3481 (uint64_t)VM_KERNEL_ADDRPERM(data
), datalen
, data
->m_flags
,
3482 (uint64_t)VM_KERNEL_ADDRPERM(data
->m_nextpkt
));
3485 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3487 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3489 cfi_buf
->cfi_pending_last
+= datalen
;
3490 cfi_buf
->cfi_pending_mbcnt
+= mbcnt
;
3491 cfil_info_buf_verify(cfi_buf
);
3493 CFIL_LOG(LOG_INFO
, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3494 (uint64_t)VM_KERNEL_ADDRPERM(so
),
3495 cfi_buf
->cfi_pending_last
,
3496 cfi_buf
->cfi_pass_offset
);
3498 /* Fast path when below pass offset */
3499 if (cfi_buf
->cfi_pending_last
<= cfi_buf
->cfi_pass_offset
) {
3500 cfil_update_entry_offsets(so
, outgoing
, datalen
);
3502 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3503 error
= cfil_data_filter(so
, kcunit
, outgoing
, data
,
3505 /* 0 means passed so continue with next filter */
3511 /* Move cursor if no filter claimed the data */
3513 cfi_buf
->cfi_pending_first
+= datalen
;
3514 cfi_buf
->cfi_pending_mbcnt
-= mbcnt
;
3515 cfil_info_buf_verify(cfi_buf
);
3518 CFIL_INFO_VERIFY(so
->so_cfil
);
3524 * Callback from socket layer sosendxxx()
3527 cfil_sock_data_out(struct socket
*so
, struct sockaddr
*to
,
3528 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3532 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3535 socket_lock_assert_owned(so
);
3537 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3538 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3539 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3542 if (control
!= NULL
) {
3543 CFIL_LOG(LOG_ERR
, "so %llx control",
3544 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3545 OSIncrementAtomic(&cfil_stats
.cfs_data_out_control
);
3547 if ((flags
& MSG_OOB
)) {
3548 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3549 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3550 OSIncrementAtomic(&cfil_stats
.cfs_data_out_oob
);
3552 if ((so
->so_snd
.sb_flags
& SB_LOCK
) == 0)
3553 panic("so %p SB_LOCK not set", so
);
3555 if (so
->so_snd
.sb_cfil_thread
!= NULL
)
3556 panic("%s sb_cfil_thread %p not NULL", __func__
,
3557 so
->so_snd
.sb_cfil_thread
);
3559 error
= cfil_data_common(so
, 1, to
, data
, control
, flags
);
3565 * Callback from socket layer sbappendxxx()
3568 cfil_sock_data_in(struct socket
*so
, struct sockaddr
*from
,
3569 struct mbuf
*data
, struct mbuf
*control
, uint32_t flags
)
3573 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3576 socket_lock_assert_owned(so
);
3578 if (so
->so_cfil
->cfi_flags
& CFIF_DROP
) {
3579 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3580 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3583 if (control
!= NULL
) {
3584 CFIL_LOG(LOG_ERR
, "so %llx control",
3585 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3586 OSIncrementAtomic(&cfil_stats
.cfs_data_in_control
);
3588 if (data
->m_type
== MT_OOBDATA
) {
3589 CFIL_LOG(LOG_ERR
, "so %llx MSG_OOB",
3590 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3591 OSIncrementAtomic(&cfil_stats
.cfs_data_in_oob
);
3593 error
= cfil_data_common(so
, 0, from
, data
, control
, flags
);
3599 * Callback from socket layer soshutdownxxx()
3601 * We may delay the shutdown write if there's outgoing data in process.
3603 * There is no point in delaying the shutdown read because the process
3604 * indicated that it does not want to read anymore data.
3607 cfil_sock_shutdown(struct socket
*so
, int *how
)
3611 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3614 socket_lock_assert_owned(so
);
3616 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3617 (uint64_t)VM_KERNEL_ADDRPERM(so
), *how
);
3620 * Check the state of the socket before the content filter
3622 if (*how
!= SHUT_WR
&& (so
->so_state
& SS_CANTRCVMORE
) != 0) {
3623 /* read already shut down */
3627 if (*how
!= SHUT_RD
&& (so
->so_state
& SS_CANTSENDMORE
) != 0) {
3628 /* write already shut down */
3633 if ((so
->so_cfil
->cfi_flags
& CFIF_DROP
) != 0) {
3634 CFIL_LOG(LOG_ERR
, "so %llx drop set",
3635 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3640 * shutdown read: SHUT_RD or SHUT_RDWR
3642 if (*how
!= SHUT_WR
) {
3643 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_RD
) {
3647 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_RD
;
3648 cfil_sock_notify_shutdown(so
, SHUT_RD
);
3651 * shutdown write: SHUT_WR or SHUT_RDWR
3653 if (*how
!= SHUT_RD
) {
3654 if (so
->so_cfil
->cfi_flags
& CFIF_SHUT_WR
) {
3658 so
->so_cfil
->cfi_flags
|= CFIF_SHUT_WR
;
3659 cfil_sock_notify_shutdown(so
, SHUT_WR
);
3661 * When outgoing data is pending, we delay the shutdown at the
3662 * protocol level until the content filters give the final
3663 * verdict on the pending data.
3665 if (cfil_sock_data_pending(&so
->so_snd
) != 0) {
3667 * When shutting down the read and write sides at once
3668 * we can proceed to the final shutdown of the read
3669 * side. Otherwise, we just return.
3671 if (*how
== SHUT_WR
) {
3672 error
= EJUSTRETURN
;
3673 } else if (*how
== SHUT_RDWR
) {
3683 * This is called when the socket is closed and there is no more
3684 * opportunity for filtering
3687 cfil_sock_is_closed(struct socket
*so
)
3692 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3695 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3697 socket_lock_assert_owned(so
);
3699 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3700 /* Let the filters know of the closing */
3701 error
= cfil_dispatch_closed_event(so
, kcunit
);
3704 /* Last chance to push passed data out */
3705 error
= cfil_acquire_sockbuf(so
, 1);
3707 cfil_service_inject_queue(so
, 1);
3708 cfil_release_sockbuf(so
, 1);
3710 so
->so_cfil
->cfi_flags
|= CFIF_SOCK_CLOSED
;
3712 /* Pending data needs to go */
3713 cfil_flush_queues(so
);
3715 CFIL_INFO_VERIFY(so
->so_cfil
);
3719 * This is called when the socket is disconnected so let the filters
3720 * know about the disconnection and that no more data will come
3722 * The how parameter has the same values as soshutown()
3725 cfil_sock_notify_shutdown(struct socket
*so
, int how
)
3730 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3733 CFIL_LOG(LOG_INFO
, "so %llx how %d",
3734 (uint64_t)VM_KERNEL_ADDRPERM(so
), how
);
3736 socket_lock_assert_owned(so
);
3738 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3739 /* Disconnect incoming side */
3741 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 0);
3742 /* Disconnect outgoing side */
3744 error
= cfil_dispatch_disconnect_event(so
, kcunit
, 1);
3749 cfil_filters_attached(struct socket
*so
)
3751 struct cfil_entry
*entry
;
3755 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3758 socket_lock_assert_owned(so
);
3760 for (kcunit
= 1; kcunit
<= MAX_CONTENT_FILTER
; kcunit
++) {
3761 entry
= &so
->so_cfil
->cfi_entries
[kcunit
- 1];
3763 /* Are we attached to the filter? */
3764 if (entry
->cfe_filter
== NULL
)
3766 if ((entry
->cfe_flags
& CFEF_SENT_SOCK_ATTACHED
) == 0)
3768 if ((entry
->cfe_flags
& CFEF_CFIL_DETACHED
) != 0)
3778 * This is called when the socket is closed and we are waiting for
3779 * the filters to gives the final pass or drop
3782 cfil_sock_close_wait(struct socket
*so
)
3784 lck_mtx_t
*mutex_held
;
3788 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3791 CFIL_LOG(LOG_INFO
, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so
));
3793 if (so
->so_proto
->pr_getlock
!= NULL
)
3794 mutex_held
= (*so
->so_proto
->pr_getlock
)(so
, PR_F_WILLUNLOCK
);
3796 mutex_held
= so
->so_proto
->pr_domain
->dom_mtx
;
3797 LCK_MTX_ASSERT(mutex_held
, LCK_MTX_ASSERT_OWNED
);
3799 while (cfil_filters_attached(so
)) {
3801 * Notify the filters we are going away so they can detach
3803 cfil_sock_notify_shutdown(so
, SHUT_RDWR
);
3806 * Make sure we need to wait after the filter are notified
3807 * of the disconnection
3809 if (cfil_filters_attached(so
) == 0)
3812 CFIL_LOG(LOG_INFO
, "so %llx waiting",
3813 (uint64_t)VM_KERNEL_ADDRPERM(so
));
3815 ts
.tv_sec
= cfil_close_wait_timeout
/ 1000;
3816 ts
.tv_nsec
= (cfil_close_wait_timeout
% 1000) *
3817 NSEC_PER_USEC
* 1000;
3819 OSIncrementAtomic(&cfil_stats
.cfs_close_wait
);
3820 so
->so_cfil
->cfi_flags
|= CFIF_CLOSE_WAIT
;
3821 error
= msleep((caddr_t
)&so
->so_cfil
, mutex_held
,
3822 PSOCK
| PCATCH
, "cfil_sock_close_wait", &ts
);
3823 so
->so_cfil
->cfi_flags
&= ~CFIF_CLOSE_WAIT
;
3825 CFIL_LOG(LOG_NOTICE
, "so %llx timed out %d",
3826 (uint64_t)VM_KERNEL_ADDRPERM(so
), (error
!= 0));
3829 * Force close in case of timeout
3832 OSIncrementAtomic(&cfil_stats
.cfs_close_wait_timeout
);
3840 * Returns the size of the data held by the content filter by using
3843 cfil_sock_data_pending(struct sockbuf
*sb
)
3845 struct socket
*so
= sb
->sb_so
;
3846 uint64_t pending
= 0;
3848 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
) {
3849 struct cfi_buf
*cfi_buf
;
3851 socket_lock_assert_owned(so
);
3853 if ((sb
->sb_flags
& SB_RECV
) == 0)
3854 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3856 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3858 pending
= cfi_buf
->cfi_pending_last
-
3859 cfi_buf
->cfi_pending_first
;
3862 * If we are limited by the "chars of mbufs used" roughly
3863 * adjust so we won't overcommit
3865 if (pending
> (uint64_t)cfi_buf
->cfi_pending_mbcnt
)
3866 pending
= cfi_buf
->cfi_pending_mbcnt
;
3869 VERIFY(pending
< INT32_MAX
);
3871 return (int32_t)(pending
);
3875 * Return the socket buffer space used by data being held by content filters
3876 * so processes won't clog the socket buffer
3879 cfil_sock_data_space(struct sockbuf
*sb
)
3881 struct socket
*so
= sb
->sb_so
;
3882 uint64_t pending
= 0;
3884 if ((so
->so_flags
& SOF_CONTENT_FILTER
) != 0 && so
->so_cfil
!= NULL
&&
3885 so
->so_snd
.sb_cfil_thread
!= current_thread()) {
3886 struct cfi_buf
*cfi_buf
;
3888 socket_lock_assert_owned(so
);
3890 if ((sb
->sb_flags
& SB_RECV
) == 0)
3891 cfi_buf
= &so
->so_cfil
->cfi_snd
;
3893 cfi_buf
= &so
->so_cfil
->cfi_rcv
;
3895 pending
= cfi_buf
->cfi_pending_last
-
3896 cfi_buf
->cfi_pending_first
;
3899 * If we are limited by the "chars of mbufs used" roughly
3900 * adjust so we won't overcommit
3902 if ((uint64_t)cfi_buf
->cfi_pending_mbcnt
> pending
)
3903 pending
= cfi_buf
->cfi_pending_mbcnt
;
3906 VERIFY(pending
< INT32_MAX
);
3908 return (int32_t)(pending
);
3912 * A callback from the socket and protocol layer when data becomes
3913 * available in the socket buffer to give a chance for the content filter
3914 * to re-inject data that was held back
3917 cfil_sock_buf_update(struct sockbuf
*sb
)
3921 struct socket
*so
= sb
->sb_so
;
3923 if ((so
->so_flags
& SOF_CONTENT_FILTER
) == 0 || so
->so_cfil
== NULL
)
3929 socket_lock_assert_owned(so
);
3931 if ((sb
->sb_flags
& SB_RECV
) == 0) {
3932 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_OUT
) == 0)
3935 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_out_retry
);
3937 if ((so
->so_cfil
->cfi_flags
& CFIF_RETRY_INJECT_IN
) == 0)
3940 OSIncrementAtomic(&cfil_stats
.cfs_inject_q_in_retry
);
3943 CFIL_LOG(LOG_NOTICE
, "so %llx outgoing %d",
3944 (uint64_t)VM_KERNEL_ADDRPERM(so
), outgoing
);
3946 error
= cfil_acquire_sockbuf(so
, outgoing
);
3948 cfil_service_inject_queue(so
, outgoing
);
3949 cfil_release_sockbuf(so
, outgoing
);
3953 sysctl_cfil_filter_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
3954 struct sysctl_req
*req
)
3956 #pragma unused(oidp, arg1, arg2)
3962 if (req
->newptr
!= USER_ADDR_NULL
)
3965 cfil_rw_lock_shared(&cfil_lck_rw
);
3967 for (i
= 0; content_filters
!= NULL
&& i
< MAX_CONTENT_FILTER
; i
++) {
3968 struct cfil_filter_stat filter_stat
;
3969 struct content_filter
*cfc
= content_filters
[i
];
3974 /* If just asking for the size */
3975 if (req
->oldptr
== USER_ADDR_NULL
) {
3976 len
+= sizeof(struct cfil_filter_stat
);
3980 bzero(&filter_stat
, sizeof(struct cfil_filter_stat
));
3981 filter_stat
.cfs_len
= sizeof(struct cfil_filter_stat
);
3982 filter_stat
.cfs_filter_id
= cfc
->cf_kcunit
;
3983 filter_stat
.cfs_flags
= cfc
->cf_flags
;
3984 filter_stat
.cfs_sock_count
= cfc
->cf_sock_count
;
3985 filter_stat
.cfs_necp_control_unit
= cfc
->cf_necp_control_unit
;
3987 error
= SYSCTL_OUT(req
, &filter_stat
,
3988 sizeof (struct cfil_filter_stat
));
3992 /* If just asking for the size */
3993 if (req
->oldptr
== USER_ADDR_NULL
)
3996 cfil_rw_unlock_shared(&cfil_lck_rw
);
4001 static int sysctl_cfil_sock_list(struct sysctl_oid
*oidp
, void *arg1
, int arg2
,
4002 struct sysctl_req
*req
)
4004 #pragma unused(oidp, arg1, arg2)
4007 struct cfil_info
*cfi
;
4010 if (req
->newptr
!= USER_ADDR_NULL
)
4013 cfil_rw_lock_shared(&cfil_lck_rw
);
4016 * If just asking for the size,
4018 if (req
->oldptr
== USER_ADDR_NULL
) {
4019 req
->oldidx
= cfil_sock_attached_count
*
4020 sizeof(struct cfil_sock_stat
);
4021 /* Bump the length in case new sockets gets attached */
4022 req
->oldidx
+= req
->oldidx
>> 3;
4026 TAILQ_FOREACH(cfi
, &cfil_sock_head
, cfi_link
) {
4027 struct cfil_entry
*entry
;
4028 struct cfil_sock_stat stat
;
4029 struct socket
*so
= cfi
->cfi_so
;
4031 bzero(&stat
, sizeof(struct cfil_sock_stat
));
4032 stat
.cfs_len
= sizeof(struct cfil_sock_stat
);
4033 stat
.cfs_sock_id
= cfi
->cfi_sock_id
;
4034 stat
.cfs_flags
= cfi
->cfi_flags
;
4037 stat
.cfs_pid
= so
->last_pid
;
4038 memcpy(stat
.cfs_uuid
, so
->last_uuid
,
4040 if (so
->so_flags
& SOF_DELEGATED
) {
4041 stat
.cfs_e_pid
= so
->e_pid
;
4042 memcpy(stat
.cfs_e_uuid
, so
->e_uuid
,
4045 stat
.cfs_e_pid
= so
->last_pid
;
4046 memcpy(stat
.cfs_e_uuid
, so
->last_uuid
,
4051 stat
.cfs_snd
.cbs_pending_first
=
4052 cfi
->cfi_snd
.cfi_pending_first
;
4053 stat
.cfs_snd
.cbs_pending_last
=
4054 cfi
->cfi_snd
.cfi_pending_last
;
4055 stat
.cfs_snd
.cbs_inject_q_len
=
4056 cfil_queue_len(&cfi
->cfi_snd
.cfi_inject_q
);
4057 stat
.cfs_snd
.cbs_pass_offset
=
4058 cfi
->cfi_snd
.cfi_pass_offset
;
4060 stat
.cfs_rcv
.cbs_pending_first
=
4061 cfi
->cfi_rcv
.cfi_pending_first
;
4062 stat
.cfs_rcv
.cbs_pending_last
=
4063 cfi
->cfi_rcv
.cfi_pending_last
;
4064 stat
.cfs_rcv
.cbs_inject_q_len
=
4065 cfil_queue_len(&cfi
->cfi_rcv
.cfi_inject_q
);
4066 stat
.cfs_rcv
.cbs_pass_offset
=
4067 cfi
->cfi_rcv
.cfi_pass_offset
;
4069 for (i
= 0; i
< MAX_CONTENT_FILTER
; i
++) {
4070 struct cfil_entry_stat
*estat
;
4071 struct cfe_buf
*ebuf
;
4072 struct cfe_buf_stat
*sbuf
;
4074 entry
= &cfi
->cfi_entries
[i
];
4076 estat
= &stat
.ces_entries
[i
];
4078 estat
->ces_len
= sizeof(struct cfil_entry_stat
);
4079 estat
->ces_filter_id
= entry
->cfe_filter
?
4080 entry
->cfe_filter
->cf_kcunit
: 0;
4081 estat
->ces_flags
= entry
->cfe_flags
;
4082 estat
->ces_necp_control_unit
=
4083 entry
->cfe_necp_control_unit
;
4085 estat
->ces_last_event
.tv_sec
=
4086 (int64_t)entry
->cfe_last_event
.tv_sec
;
4087 estat
->ces_last_event
.tv_usec
=
4088 (int64_t)entry
->cfe_last_event
.tv_usec
;
4090 estat
->ces_last_action
.tv_sec
=
4091 (int64_t)entry
->cfe_last_action
.tv_sec
;
4092 estat
->ces_last_action
.tv_usec
=
4093 (int64_t)entry
->cfe_last_action
.tv_usec
;
4095 ebuf
= &entry
->cfe_snd
;
4096 sbuf
= &estat
->ces_snd
;
4097 sbuf
->cbs_pending_first
=
4098 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4099 sbuf
->cbs_pending_last
=
4100 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4101 sbuf
->cbs_ctl_first
=
4102 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4103 sbuf
->cbs_ctl_last
=
4104 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4105 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4106 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4107 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4109 ebuf
= &entry
->cfe_rcv
;
4110 sbuf
= &estat
->ces_rcv
;
4111 sbuf
->cbs_pending_first
=
4112 cfil_queue_offset_first(&ebuf
->cfe_pending_q
);
4113 sbuf
->cbs_pending_last
=
4114 cfil_queue_offset_last(&ebuf
->cfe_pending_q
);
4115 sbuf
->cbs_ctl_first
=
4116 cfil_queue_offset_first(&ebuf
->cfe_ctl_q
);
4117 sbuf
->cbs_ctl_last
=
4118 cfil_queue_offset_last(&ebuf
->cfe_ctl_q
);
4119 sbuf
->cbs_pass_offset
= ebuf
->cfe_pass_offset
;
4120 sbuf
->cbs_peek_offset
= ebuf
->cfe_peek_offset
;
4121 sbuf
->cbs_peeked
= ebuf
->cfe_peeked
;
4123 error
= SYSCTL_OUT(req
, &stat
,
4124 sizeof (struct cfil_sock_stat
));
4129 cfil_rw_unlock_shared(&cfil_lck_rw
);