]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/content_filter.c
xnu-4570.51.1.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
CommitLineData
fe8ab488 1/*
5ba3f43e 2 * Copyright (c) 2013-2017 Apple Inc. All rights reserved.
fe8ab488
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
5ba3f43e 35 *
fe8ab488
A
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
5ba3f43e 57 * database to specify which TCP/IP sockets need to be filtered. The NECP
fe8ab488
A
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
5ba3f43e 80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
fe8ab488
A
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
5ba3f43e 120 * action message is sent by the user space filter agent.
fe8ab488
A
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
5ba3f43e 123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
fe8ab488
A
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
5ba3f43e 126 * They are two kinds of primary content filter actions:
fe8ab488
A
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
5ba3f43e
A
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
132 *
fe8ab488
A
133 *
134 * ACTION MESSAGES
135 *
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
143 *
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
147 *
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
157 *
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
162 *
163 *
164 * PER SOCKET "struct cfil_info"
165 *
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
168 * socket.
169 *
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
174 * decision;
175 * - The inject queue for data that passed the filters and that needs
176 * to be re-injected;
177 * - A content filter specific state in a set of "struct cfil_entry"
178 *
179 *
180 * CONTENT FILTER STATE "struct cfil_entry"
181 *
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
184 *
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
188 *
189 * For each direction, "struct cfil_entry" maintains the following information:
190 * - The pass offset
191 * - The peek offset
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
197 *
198 *
199 * CONTENT FILTER QUEUES
200 *
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
5ba3f43e 202 * and instead will sit in one of three content filter queues until the data
fe8ab488
A
203 * can be re-injected into the TCP/IP socket buffer.
204 *
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
207 * the list of mbufs.
208 *
209 * The data moves into the three content filter queues according to this
210 * sequence:
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
214 *
5ba3f43e 215 * Note: The sequence (a),(b) may be repeated several times if there is more
fe8ab488
A
216 * than one content filter attached to the TCP/IP socket.
217 *
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
222 *
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
228 *
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
231 * TCP/IP socket.
232 *
233 *
234 * IMPACT ON FLOW CONTROL
235 *
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
238 *
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
242 * processing delays.
243 *
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
250 *
251 *
252 * LOCKING STRATEGY
253 *
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
257 * threads.
258 *
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
261 *
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
265 *
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
269 *
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
272 *
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
276 *
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
279 *
280 *
281 * LIMITATIONS
282 *
283 * - For TCP sockets only
284 *
285 * - Does not support TCP unordered messages
286 */
287
288/*
289 * TO DO LIST
290 *
291 * SOONER:
292 *
293 * Deal with OOB
294 *
295 * LATER:
296 *
297 * If support datagram, enqueue control and address mbufs as well
298 */
299
300#include <sys/types.h>
301#include <sys/kern_control.h>
302#include <sys/queue.h>
303#include <sys/domain.h>
304#include <sys/protosw.h>
305#include <sys/syslog.h>
306
307#include <kern/locks.h>
308#include <kern/zalloc.h>
309#include <kern/debug.h>
310
311#include <net/content_filter.h>
312
313#include <netinet/in_pcb.h>
314#include <netinet/tcp.h>
315#include <netinet/tcp_var.h>
316
317#include <string.h>
318#include <libkern/libkern.h>
319
320
321#define MAX_CONTENT_FILTER 2
322
323struct cfil_entry;
324
325/*
326 * The structure content_filter represents a user space content filter
327 * It's created and associated with a kernel control socket instance
328 */
329struct content_filter {
330 kern_ctl_ref cf_kcref;
331 u_int32_t cf_kcunit;
332 u_int32_t cf_flags;
333
334 uint32_t cf_necp_control_unit;
335
336 uint32_t cf_sock_count;
337 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
338};
339
340#define CFF_ACTIVE 0x01
341#define CFF_DETACHING 0x02
342#define CFF_FLOW_CONTROLLED 0x04
343
344struct content_filter **content_filters = NULL;
345uint32_t cfil_active_count = 0; /* Number of active content filters */
346uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
347uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
348
349static kern_ctl_ref cfil_kctlref = NULL;
350
351static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
352static lck_attr_t *cfil_lck_attr = NULL;
353static lck_grp_t *cfil_lck_grp = NULL;
354decl_lck_rw_data(static, cfil_lck_rw);
355
356#define CFIL_RW_LCK_MAX 8
357
358int cfil_rw_nxt_lck = 0;
359void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
360
361int cfil_rw_nxt_unlck = 0;
362void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
363
364#define CONTENT_FILTER_ZONE_NAME "content_filter"
365#define CONTENT_FILTER_ZONE_MAX 10
366static struct zone *content_filter_zone = NULL; /* zone for content_filter */
367
368
369#define CFIL_INFO_ZONE_NAME "cfil_info"
370#define CFIL_INFO_ZONE_MAX 1024
371static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
372
373MBUFQ_HEAD(cfil_mqhead);
374
375struct cfil_queue {
376 uint64_t q_start; /* offset of first byte in queue */
377 uint64_t q_end; /* offset of last byte in queue */
378 struct cfil_mqhead q_mq;
379};
380
381/*
382 * struct cfil_entry
383 *
384 * The is one entry per content filter
385 */
386struct cfil_entry {
387 TAILQ_ENTRY(cfil_entry) cfe_link;
388 struct content_filter *cfe_filter;
389
390 struct cfil_info *cfe_cfil_info;
391 uint32_t cfe_flags;
392 uint32_t cfe_necp_control_unit;
393 struct timeval cfe_last_event; /* To user space */
394 struct timeval cfe_last_action; /* From user space */
395
396 struct cfe_buf {
397 /*
398 * cfe_pending_q holds data that has been delivered to
399 * the filter and for which we are waiting for an action
400 */
401 struct cfil_queue cfe_pending_q;
402 /*
403 * This queue is for data that has not be delivered to
404 * the content filter (new data, pass peek or flow control)
405 */
406 struct cfil_queue cfe_ctl_q;
407
408 uint64_t cfe_pass_offset;
409 uint64_t cfe_peek_offset;
410 uint64_t cfe_peeked;
411 } cfe_snd, cfe_rcv;
412};
413
414#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
415#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
416#define CFEF_DATA_START 0x0004 /* can send data event */
417#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
418#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
419#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
420#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
421#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
422
5ba3f43e
A
423
424#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
425 struct timeval _tdiff; \
426 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
427 timersub(t1, t0, &_tdiff); \
428 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
429 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
430 (cfil)->cfi_op_list_ctr ++; \
431 }
432
fe8ab488
A
433/*
434 * struct cfil_info
435 *
436 * There is a struct cfil_info per socket
437 */
438struct cfil_info {
439 TAILQ_ENTRY(cfil_info) cfi_link;
440 struct socket *cfi_so;
441 uint64_t cfi_flags;
442 uint64_t cfi_sock_id;
5ba3f43e
A
443 struct timeval64 cfi_first_event;
444 uint32_t cfi_op_list_ctr;
445 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
446 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
fe8ab488
A
447
448 struct cfi_buf {
449 /*
450 * cfi_pending_first and cfi_pending_last describe the total
451 * amount of data outstanding for all the filters on
452 * this socket and data in the flow queue
453 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
454 */
455 uint64_t cfi_pending_first;
456 uint64_t cfi_pending_last;
457 int cfi_pending_mbcnt;
458 /*
459 * cfi_pass_offset is the minimum of all the filters
460 */
461 uint64_t cfi_pass_offset;
462 /*
463 * cfi_inject_q holds data that needs to be re-injected
464 * into the socket after filtering and that can
465 * be queued because of flow control
466 */
467 struct cfil_queue cfi_inject_q;
468 } cfi_snd, cfi_rcv;
469
470 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
5ba3f43e 471} __attribute__((aligned(8)));
fe8ab488
A
472
473#define CFIF_DROP 0x0001 /* drop action applied */
474#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
475#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
476#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
477#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
478#define CFIF_SHUT_WR 0x0040 /* shutdown write */
479#define CFIF_SHUT_RD 0x0080 /* shutdown read */
480
481#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
482#define CFI_SHIFT_GENCNT 32
483#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
484#define CFI_SHIFT_FLOWHASH 0
485
486TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
487
488#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
489#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
490
491/*
492 * Statistics
493 */
494
495struct cfil_stats cfil_stats;
496
497/*
498 * For troubleshooting
499 */
500int cfil_log_level = LOG_ERR;
501int cfil_debug = 1;
502
503/*
504 * Sysctls for logs and statistics
505 */
506static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
507 struct sysctl_req *);
508static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
509 struct sysctl_req *);
510
511SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil");
512
513SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
514 &cfil_log_level, 0, "");
515
516SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED,
517 &cfil_debug, 0, "");
518
519SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED,
520 &cfil_sock_attached_count, 0, "");
521
522SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED,
523 &cfil_active_count, 0, "");
524
525SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
526 &cfil_close_wait_timeout, 0, "");
527
528static int cfil_sbtrim = 1;
529SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED,
530 &cfil_sbtrim, 0, "");
531
532SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED,
533 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
534
535SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED,
536 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
537
538SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED,
539 &cfil_stats, cfil_stats, "");
540
541/*
542 * Forward declaration to appease the compiler
543 */
544static int cfil_action_data_pass(struct socket *, uint32_t, int,
545 uint64_t, uint64_t);
546static int cfil_action_drop(struct socket *, uint32_t);
5ba3f43e 547static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
fe8ab488
A
548static int cfil_dispatch_closed_event(struct socket *, int);
549static int cfil_data_common(struct socket *, int, struct sockaddr *,
550 struct mbuf *, struct mbuf *, uint32_t);
551static int cfil_data_filter(struct socket *, uint32_t, int,
552 struct mbuf *, uint64_t);
553static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
554 struct in_addr, u_int16_t);
555static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
556 struct in6_addr *, u_int16_t);
557static int cfil_dispatch_attach_event(struct socket *, uint32_t);
558static void cfil_info_free(struct socket *, struct cfil_info *);
559static struct cfil_info * cfil_info_alloc(struct socket *);
560static int cfil_info_attach_unit(struct socket *, uint32_t);
561static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t);
5ba3f43e 562static struct socket *cfil_socket_from_client_uuid(uuid_t, bool *);
fe8ab488
A
563static int cfil_service_pending_queue(struct socket *, uint32_t, int);
564static int cfil_data_service_ctl_q(struct socket *, uint32_t, int);
565static void cfil_info_verify(struct cfil_info *);
566static int cfil_update_data_offsets(struct socket *, uint32_t, int,
567 uint64_t, uint64_t);
568static int cfil_acquire_sockbuf(struct socket *, int);
569static void cfil_release_sockbuf(struct socket *, int);
570static int cfil_filters_attached(struct socket *);
571
572static void cfil_rw_lock_exclusive(lck_rw_t *);
573static void cfil_rw_unlock_exclusive(lck_rw_t *);
574static void cfil_rw_lock_shared(lck_rw_t *);
575static void cfil_rw_unlock_shared(lck_rw_t *);
576static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
577static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
578
579static unsigned int cfil_data_length(struct mbuf *, int *);
580
581/*
582 * Content filter global read write lock
583 */
584
585static void
586cfil_rw_lock_exclusive(lck_rw_t *lck)
587{
588 void *lr_saved;
589
590 lr_saved = __builtin_return_address(0);
591
592 lck_rw_lock_exclusive(lck);
593
594 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
595 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
596}
597
598static void
599cfil_rw_unlock_exclusive(lck_rw_t *lck)
600{
601 void *lr_saved;
602
603 lr_saved = __builtin_return_address(0);
604
605 lck_rw_unlock_exclusive(lck);
606
607 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
608 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
609}
610
611static void
612cfil_rw_lock_shared(lck_rw_t *lck)
613{
614 void *lr_saved;
615
616 lr_saved = __builtin_return_address(0);
617
618 lck_rw_lock_shared(lck);
619
620 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
621 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
622}
623
624static void
625cfil_rw_unlock_shared(lck_rw_t *lck)
626{
627 void *lr_saved;
628
629 lr_saved = __builtin_return_address(0);
630
631 lck_rw_unlock_shared(lck);
632
633 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
634 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
635}
636
637static boolean_t
638cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
639{
640 void *lr_saved;
641 boolean_t upgraded;
642
643 lr_saved = __builtin_return_address(0);
644
645 upgraded = lck_rw_lock_shared_to_exclusive(lck);
646 if (upgraded) {
647 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
648 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
649 }
650 return (upgraded);
651}
652
653static void
654cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
655{
656 void *lr_saved;
657
658 lr_saved = __builtin_return_address(0);
659
660 lck_rw_lock_exclusive_to_shared(lck);
661
662 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
663 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
664}
665
666static void
667cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
668{
5ba3f43e
A
669#if !MACH_ASSERT
670#pragma unused(lck, exclusive)
671#endif
672 LCK_RW_ASSERT(lck,
fe8ab488
A
673 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
674}
675
fe8ab488
A
676/*
677 * Return the number of bytes in the mbuf chain using the same
678 * method as m_length() or sballoc()
679 */
680static unsigned int
681cfil_data_length(struct mbuf *m, int *retmbcnt)
682{
683 struct mbuf *m0;
684 unsigned int pktlen;
685 int mbcnt;
686
687 if (retmbcnt == NULL)
688 return (m_length(m));
689
690 pktlen = 0;
691 mbcnt = 0;
692 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
693 pktlen += m0->m_len;
694 mbcnt += MSIZE;
695 if (m0->m_flags & M_EXT)
696 mbcnt += m0->m_ext.ext_size;
697 }
698 *retmbcnt = mbcnt;
699 return (pktlen);
700}
701
702/*
703 * Common mbuf queue utilities
704 */
705
706static inline void
707cfil_queue_init(struct cfil_queue *cfq)
708{
709 cfq->q_start = 0;
710 cfq->q_end = 0;
711 MBUFQ_INIT(&cfq->q_mq);
712}
713
714static inline uint64_t
715cfil_queue_drain(struct cfil_queue *cfq)
716{
717 uint64_t drained = cfq->q_start - cfq->q_end;
718 cfq->q_start = 0;
719 cfq->q_end = 0;
720 MBUFQ_DRAIN(&cfq->q_mq);
721
722 return (drained);
723}
724
725/* Return 1 when empty, 0 otherwise */
726static inline int
727cfil_queue_empty(struct cfil_queue *cfq)
728{
729 return (MBUFQ_EMPTY(&cfq->q_mq));
730}
731
732static inline uint64_t
733cfil_queue_offset_first(struct cfil_queue *cfq)
734{
735 return (cfq->q_start);
736}
737
738static inline uint64_t
739cfil_queue_offset_last(struct cfil_queue *cfq)
740{
741 return (cfq->q_end);
742}
743
744static inline uint64_t
745cfil_queue_len(struct cfil_queue *cfq)
746{
747 return (cfq->q_end - cfq->q_start);
748}
749
750/*
751 * Routines to verify some fundamental assumptions
752 */
753
754static void
755cfil_queue_verify(struct cfil_queue *cfq)
756{
757 mbuf_t m;
758 mbuf_t n;
759 uint64_t queuesize = 0;
760
761 /* Verify offset are ordered */
762 VERIFY(cfq->q_start <= cfq->q_end);
763
764 /*
765 * When queue is empty, the offsets are equal otherwise the offsets
766 * are different
767 */
768 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
769 (!MBUFQ_EMPTY(&cfq->q_mq) &&
770 cfq->q_start != cfq->q_end));
771
772 MBUFQ_FOREACH(m, &cfq->q_mq) {
773 size_t chainsize = 0;
774 unsigned int mlen = m_length(m);
775
776 if (m == (void *)M_TAG_FREE_PATTERN ||
777 m->m_next == (void *)M_TAG_FREE_PATTERN ||
778 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
779 panic("%s - mq %p is free at %p", __func__,
780 &cfq->q_mq, m);
781 for (n = m; n != NULL; n = n->m_next) {
782 if (n->m_type != MT_DATA &&
783 n->m_type != MT_HEADER &&
784 n->m_type != MT_OOBDATA)
785 panic("%s - %p unsupported type %u", __func__,
786 n, n->m_type);
787 chainsize += n->m_len;
788 }
789 if (mlen != chainsize)
790 panic("%s - %p m_length() %u != chainsize %lu",
791 __func__, m, mlen, chainsize);
792 queuesize += chainsize;
793 }
794 if (queuesize != cfq->q_end - cfq->q_start)
795 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
796 m, queuesize, cfq->q_end - cfq->q_start);
797}
798
799static void
800cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
801{
802 CFIL_QUEUE_VERIFY(cfq);
803
804 MBUFQ_ENQUEUE(&cfq->q_mq, m);
805 cfq->q_end += len;
806
807 CFIL_QUEUE_VERIFY(cfq);
808}
809
810static void
811cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
812{
813 CFIL_QUEUE_VERIFY(cfq);
814
815 VERIFY(m_length(m) == len);
816
817 MBUFQ_REMOVE(&cfq->q_mq, m);
818 MBUFQ_NEXT(m) = NULL;
819 cfq->q_start += len;
820
821 CFIL_QUEUE_VERIFY(cfq);
822}
823
824static mbuf_t
825cfil_queue_first(struct cfil_queue *cfq)
826{
827 return (MBUFQ_FIRST(&cfq->q_mq));
828}
829
830static mbuf_t
831cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
832{
833#pragma unused(cfq)
834 return (MBUFQ_NEXT(m));
835}
836
837static void
838cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
839{
840 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
841 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
842
843 /* Verify the queues are ordered so that pending is before ctl */
844 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
845
846 /* The peek offset cannot be less than the pass offset */
847 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
848
849 /* Make sure we've updated the offset we peeked at */
850 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
851}
852
853static void
854cfil_entry_verify(struct cfil_entry *entry)
855{
856 cfil_entry_buf_verify(&entry->cfe_snd);
857 cfil_entry_buf_verify(&entry->cfe_rcv);
858}
859
860static void
861cfil_info_buf_verify(struct cfi_buf *cfi_buf)
862{
863 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
864
865 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
866 VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
867}
868
869static void
870cfil_info_verify(struct cfil_info *cfil_info)
871{
872 int i;
873
874 if (cfil_info == NULL)
875 return;
876
877 cfil_info_buf_verify(&cfil_info->cfi_snd);
878 cfil_info_buf_verify(&cfil_info->cfi_rcv);
879
880 for (i = 0; i < MAX_CONTENT_FILTER; i++)
881 cfil_entry_verify(&cfil_info->cfi_entries[i]);
882}
883
884static void
885verify_content_filter(struct content_filter *cfc)
886{
887 struct cfil_entry *entry;
888 uint32_t count = 0;
889
890 VERIFY(cfc->cf_sock_count >= 0);
891
892 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
893 count++;
894 VERIFY(cfc == entry->cfe_filter);
895 }
896 VERIFY(count == cfc->cf_sock_count);
897}
898
899/*
900 * Kernel control socket callbacks
901 */
902static errno_t
903cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
904 void **unitinfo)
905{
906 errno_t error = 0;
907 struct content_filter *cfc = NULL;
908
909 CFIL_LOG(LOG_NOTICE, "");
910
911 cfc = zalloc(content_filter_zone);
912 if (cfc == NULL) {
913 CFIL_LOG(LOG_ERR, "zalloc failed");
914 error = ENOMEM;
915 goto done;
916 }
917 bzero(cfc, sizeof(struct content_filter));
918
919 cfil_rw_lock_exclusive(&cfil_lck_rw);
920 if (content_filters == NULL) {
921 struct content_filter **tmp;
922
923 cfil_rw_unlock_exclusive(&cfil_lck_rw);
924
925 MALLOC(tmp,
926 struct content_filter **,
927 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
928 M_TEMP,
929 M_WAITOK | M_ZERO);
930
931 cfil_rw_lock_exclusive(&cfil_lck_rw);
932
933 if (tmp == NULL && content_filters == NULL) {
934 error = ENOMEM;
935 cfil_rw_unlock_exclusive(&cfil_lck_rw);
936 goto done;
937 }
938 /* Another thread may have won the race */
939 if (content_filters != NULL)
940 FREE(tmp, M_TEMP);
941 else
942 content_filters = tmp;
943 }
944
945 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
946 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
947 error = EINVAL;
948 } else if (content_filters[sac->sc_unit - 1] != NULL) {
949 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
950 error = EADDRINUSE;
951 } else {
952 /*
953 * kernel control socket kcunit numbers start at 1
954 */
955 content_filters[sac->sc_unit - 1] = cfc;
956
957 cfc->cf_kcref = kctlref;
958 cfc->cf_kcunit = sac->sc_unit;
959 TAILQ_INIT(&cfc->cf_sock_entries);
960
961 *unitinfo = cfc;
962 cfil_active_count++;
963 }
964 cfil_rw_unlock_exclusive(&cfil_lck_rw);
965done:
966 if (error != 0 && cfc != NULL)
967 zfree(content_filter_zone, cfc);
968
969 if (error == 0)
970 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
971 else
972 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
973
974 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
975 error, cfil_active_count, sac->sc_unit);
976
977 return (error);
978}
979
980static errno_t
981cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
982{
983#pragma unused(kctlref)
984 errno_t error = 0;
985 struct content_filter *cfc;
986 struct cfil_entry *entry;
987
988 CFIL_LOG(LOG_NOTICE, "");
989
990 if (content_filters == NULL) {
991 CFIL_LOG(LOG_ERR, "no content filter");
992 error = EINVAL;
993 goto done;
994 }
995 if (kcunit > MAX_CONTENT_FILTER) {
996 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
997 kcunit, MAX_CONTENT_FILTER);
998 error = EINVAL;
999 goto done;
1000 }
1001
1002 cfc = (struct content_filter *)unitinfo;
1003 if (cfc == NULL)
1004 goto done;
1005
1006 cfil_rw_lock_exclusive(&cfil_lck_rw);
1007 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1008 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1009 kcunit);
1010 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1011 goto done;
1012 }
1013 cfc->cf_flags |= CFF_DETACHING;
1014 /*
1015 * Remove all sockets from the filter
1016 */
1017 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1018 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1019
1020 verify_content_filter(cfc);
1021 /*
1022 * Accept all outstanding data by pushing to next filter
1023 * or back to socket
1024 *
1025 * TBD: Actually we should make sure all data has been pushed
1026 * back to socket
1027 */
1028 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1029 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1030 struct socket *so = cfil_info->cfi_so;
1031
1032 /* Need to let data flow immediately */
1033 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1034 CFEF_DATA_START;
1035
1036 /*
1037 * Respect locking hierarchy
1038 */
1039 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1040
1041 socket_lock(so, 1);
1042
1043 /*
1044 * When cfe_filter is NULL the filter is detached
1045 * and the entry has been removed from cf_sock_entries
1046 */
1047 if (so->so_cfil == NULL || entry->cfe_filter == NULL) {
1048 cfil_rw_lock_exclusive(&cfil_lck_rw);
1049 goto release;
1050 }
1051 (void) cfil_action_data_pass(so, kcunit, 1,
1052 CFM_MAX_OFFSET,
1053 CFM_MAX_OFFSET);
1054
1055 (void) cfil_action_data_pass(so, kcunit, 0,
1056 CFM_MAX_OFFSET,
1057 CFM_MAX_OFFSET);
1058
1059 cfil_rw_lock_exclusive(&cfil_lck_rw);
1060
1061 /*
1062 * Check again as the socket may have been unlocked
1063 * when when calling cfil_acquire_sockbuf()
1064 */
1065 if (so->so_cfil == NULL || entry->cfe_filter == NULL)
1066 goto release;
1067
1068 /* The filter is now detached */
1069 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1070 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1071 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1072
1073 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
1074 cfil_filters_attached(so) == 0) {
1075 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1076 (uint64_t)VM_KERNEL_ADDRPERM(so));
1077 wakeup((caddr_t)&so->so_cfil);
1078 }
1079
1080 /*
1081 * Remove the filter entry from the content filter
1082 * but leave the rest of the state intact as the queues
1083 * may not be empty yet
1084 */
1085 entry->cfe_filter = NULL;
1086 entry->cfe_necp_control_unit = 0;
1087
1088 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1089 cfc->cf_sock_count--;
1090release:
1091 socket_unlock(so, 1);
1092 }
1093 }
1094 verify_content_filter(cfc);
1095
1096 VERIFY(cfc->cf_sock_count == 0);
1097
1098 /*
1099 * Make filter inactive
1100 */
1101 content_filters[kcunit - 1] = NULL;
1102 cfil_active_count--;
1103 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1104
1105 zfree(content_filter_zone, cfc);
1106done:
1107 if (error == 0)
1108 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1109 else
1110 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1111
1112 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1113 error, cfil_active_count, kcunit);
1114
1115 return (error);
1116}
1117
1118/*
1119 * cfil_acquire_sockbuf()
1120 *
1121 * Prevent any other thread from acquiring the sockbuf
1122 * We use sb_cfil_thread as a semaphore to prevent other threads from
1123 * messing with the sockbuf -- see sblock()
1124 * Note: We do not set SB_LOCK here because the thread may check or modify
1125 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1126 * sblock(), sbunlock() or sodefunct()
1127 */
1128static int
1129cfil_acquire_sockbuf(struct socket *so, int outgoing)
1130{
1131 thread_t tp = current_thread();
1132 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1133 lck_mtx_t *mutex_held;
1134 int error = 0;
1135
1136 /*
1137 * Wait until no thread is holding the sockbuf and other content
1138 * filter threads have released the sockbuf
1139 */
1140 while ((sb->sb_flags & SB_LOCK) ||
1141 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1142 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 1143 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
fe8ab488
A
1144 else
1145 mutex_held = so->so_proto->pr_domain->dom_mtx;
1146
5ba3f43e 1147 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
fe8ab488
A
1148
1149 sb->sb_wantlock++;
1150 VERIFY(sb->sb_wantlock != 0);
1151
1152 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1153 NULL);
1154
1155 VERIFY(sb->sb_wantlock != 0);
1156 sb->sb_wantlock--;
1157 }
1158 /*
1159 * Use reference count for repetitive calls on same thread
1160 */
1161 if (sb->sb_cfil_refs == 0) {
1162 VERIFY(sb->sb_cfil_thread == NULL);
1163 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1164
1165 sb->sb_cfil_thread = tp;
1166 sb->sb_flags |= SB_LOCK;
1167 }
1168 sb->sb_cfil_refs++;
1169
1170 /* We acquire the socket buffer when we need to cleanup */
1171 if (so->so_cfil == NULL) {
1172 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1173 (uint64_t)VM_KERNEL_ADDRPERM(so));
1174 error = 0;
1175 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
1176 CFIL_LOG(LOG_ERR, "so %llx drop set",
1177 (uint64_t)VM_KERNEL_ADDRPERM(so));
1178 error = EPIPE;
1179 }
1180
1181 return (error);
1182}
1183
1184static void
1185cfil_release_sockbuf(struct socket *so, int outgoing)
1186{
1187 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1188 thread_t tp = current_thread();
1189
1190 socket_lock_assert_owned(so);
1191
1192 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
1193 panic("%s sb_cfil_thread %p not current %p", __func__,
1194 sb->sb_cfil_thread, tp);
1195 /*
1196 * Don't panic if we are defunct because SB_LOCK has
1197 * been cleared by sodefunct()
1198 */
1199 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
1200 panic("%s SB_LOCK not set on %p", __func__,
1201 sb);
1202 /*
1203 * We can unlock when the thread unwinds to the last reference
1204 */
1205 sb->sb_cfil_refs--;
1206 if (sb->sb_cfil_refs == 0) {
1207 sb->sb_cfil_thread = NULL;
1208 sb->sb_flags &= ~SB_LOCK;
1209
1210 if (sb->sb_wantlock > 0)
1211 wakeup(&sb->sb_flags);
1212 }
1213}
1214
1215cfil_sock_id_t
1216cfil_sock_id_from_socket(struct socket *so)
1217{
1218 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
1219 return (so->so_cfil->cfi_sock_id);
1220 else
1221 return (CFIL_SOCK_ID_NONE);
1222}
1223
1224static struct socket *
1225cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id)
1226{
1227 struct socket *so = NULL;
1228 u_int64_t gencnt = cfil_sock_id >> 32;
1229 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1230 struct inpcb *inp = NULL;
1231 struct inpcbinfo *pcbinfo = &tcbinfo;
1232
1233 lck_rw_lock_shared(pcbinfo->ipi_lock);
1234 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1235 if (inp->inp_state != INPCB_STATE_DEAD &&
1236 inp->inp_socket != NULL &&
1237 inp->inp_flowhash == flowhash &&
1238 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1239 inp->inp_socket->so_cfil != NULL) {
1240 so = inp->inp_socket;
1241 break;
1242 }
1243 }
1244 lck_rw_done(pcbinfo->ipi_lock);
1245
1246 if (so == NULL) {
1247 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1248 CFIL_LOG(LOG_DEBUG,
1249 "no socket for sock_id %llx gencnt %llx flowhash %x",
1250 cfil_sock_id, gencnt, flowhash);
1251 }
1252
1253 return (so);
1254}
1255
5ba3f43e
A
1256static struct socket *
1257cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1258{
1259 struct socket *so = NULL;
1260 struct inpcb *inp = NULL;
1261 struct inpcbinfo *pcbinfo = &tcbinfo;
1262
1263 lck_rw_lock_shared(pcbinfo->ipi_lock);
1264 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1265 if (inp->inp_state != INPCB_STATE_DEAD &&
1266 inp->inp_socket != NULL &&
1267 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1268 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1269 so = inp->inp_socket;
1270 break;
1271 }
1272 }
1273 lck_rw_done(pcbinfo->ipi_lock);
1274
1275 return (so);
1276}
1277
fe8ab488
A
1278static errno_t
1279cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1280 int flags)
1281{
1282#pragma unused(kctlref, flags)
1283 errno_t error = 0;
1284 struct cfil_msg_hdr *msghdr;
1285 struct content_filter *cfc = (struct content_filter *)unitinfo;
1286 struct socket *so;
1287 struct cfil_msg_action *action_msg;
1288 struct cfil_entry *entry;
1289
1290 CFIL_LOG(LOG_INFO, "");
1291
1292 if (content_filters == NULL) {
1293 CFIL_LOG(LOG_ERR, "no content filter");
1294 error = EINVAL;
1295 goto done;
1296 }
1297 if (kcunit > MAX_CONTENT_FILTER) {
1298 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1299 kcunit, MAX_CONTENT_FILTER);
1300 error = EINVAL;
1301 goto done;
1302 }
1303
1304 if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1305 CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1306 error = EINVAL;
1307 goto done;
1308 }
1309 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1310 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1311 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1312 error = EINVAL;
1313 goto done;
1314 }
1315 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1316 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1317 error = EINVAL;
1318 goto done;
1319 }
1320 /* Validate action operation */
1321 switch (msghdr->cfm_op) {
1322 case CFM_OP_DATA_UPDATE:
1323 OSIncrementAtomic(
1324 &cfil_stats.cfs_ctl_action_data_update);
1325 break;
1326 case CFM_OP_DROP:
1327 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1328 break;
5ba3f43e
A
1329 case CFM_OP_BLESS_CLIENT:
1330 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1331 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1332 error = EINVAL;
1333 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1334 msghdr->cfm_len,
1335 msghdr->cfm_op);
1336 goto done;
1337 }
1338 error = cfil_action_bless_client(kcunit, msghdr);
1339 goto done;
fe8ab488
A
1340 default:
1341 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1342 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1343 error = EINVAL;
1344 goto done;
1345 }
1346 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1347 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1348 error = EINVAL;
1349 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1350 msghdr->cfm_len,
1351 msghdr->cfm_op);
1352 goto done;
1353 }
1354 cfil_rw_lock_shared(&cfil_lck_rw);
1355 if (cfc != (void *)content_filters[kcunit - 1]) {
1356 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1357 kcunit);
1358 error = EINVAL;
1359 cfil_rw_unlock_shared(&cfil_lck_rw);
1360 goto done;
1361 }
1362
1363 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id);
1364 if (so == NULL) {
1365 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1366 msghdr->cfm_sock_id);
1367 error = EINVAL;
1368 cfil_rw_unlock_shared(&cfil_lck_rw);
1369 goto done;
1370 }
1371 cfil_rw_unlock_shared(&cfil_lck_rw);
1372
1373 socket_lock(so, 1);
1374
1375 if (so->so_cfil == NULL) {
1376 CFIL_LOG(LOG_NOTICE, "so %llx not attached",
1377 (uint64_t)VM_KERNEL_ADDRPERM(so));
1378 error = EINVAL;
1379 goto unlock;
1380 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
1381 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1382 (uint64_t)VM_KERNEL_ADDRPERM(so));
1383 error = EINVAL;
1384 goto unlock;
1385 }
1386 entry = &so->so_cfil->cfi_entries[kcunit - 1];
1387 if (entry->cfe_filter == NULL) {
1388 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1389 (uint64_t)VM_KERNEL_ADDRPERM(so));
1390 error = EINVAL;
1391 goto unlock;
1392 }
1393
1394 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
1395 entry->cfe_flags |= CFEF_DATA_START;
1396 else {
1397 CFIL_LOG(LOG_ERR,
1398 "so %llx attached not sent for %u",
1399 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1400 error = EINVAL;
1401 goto unlock;
1402 }
1403
1404 microuptime(&entry->cfe_last_action);
5ba3f43e 1405 CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_action, &so->so_cfil->cfi_first_event, msghdr->cfm_op);
fe8ab488
A
1406
1407 action_msg = (struct cfil_msg_action *)msghdr;
1408
1409 switch (msghdr->cfm_op) {
1410 case CFM_OP_DATA_UPDATE:
1411 if (action_msg->cfa_out_peek_offset != 0 ||
1412 action_msg->cfa_out_pass_offset != 0)
1413 error = cfil_action_data_pass(so, kcunit, 1,
1414 action_msg->cfa_out_pass_offset,
1415 action_msg->cfa_out_peek_offset);
1416 if (error == EJUSTRETURN)
1417 error = 0;
1418 if (error != 0)
1419 break;
1420 if (action_msg->cfa_in_peek_offset != 0 ||
1421 action_msg->cfa_in_pass_offset != 0)
1422 error = cfil_action_data_pass(so, kcunit, 0,
1423 action_msg->cfa_in_pass_offset,
1424 action_msg->cfa_in_peek_offset);
1425 if (error == EJUSTRETURN)
1426 error = 0;
1427 break;
1428
1429 case CFM_OP_DROP:
1430 error = cfil_action_drop(so, kcunit);
1431 break;
1432
1433 default:
1434 error = EINVAL;
1435 break;
1436 }
1437unlock:
1438 socket_unlock(so, 1);
1439done:
1440 mbuf_freem(m);
1441
1442 if (error == 0)
1443 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1444 else
1445 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1446
1447 return (error);
1448}
1449
1450static errno_t
1451cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1452 int opt, void *data, size_t *len)
1453{
1454#pragma unused(kctlref, opt)
1455 errno_t error = 0;
1456 struct content_filter *cfc = (struct content_filter *)unitinfo;
1457
1458 CFIL_LOG(LOG_NOTICE, "");
1459
1460 cfil_rw_lock_shared(&cfil_lck_rw);
1461
1462 if (content_filters == NULL) {
1463 CFIL_LOG(LOG_ERR, "no content filter");
1464 error = EINVAL;
1465 goto done;
1466 }
1467 if (kcunit > MAX_CONTENT_FILTER) {
1468 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1469 kcunit, MAX_CONTENT_FILTER);
1470 error = EINVAL;
1471 goto done;
1472 }
1473 if (cfc != (void *)content_filters[kcunit - 1]) {
1474 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1475 kcunit);
1476 error = EINVAL;
1477 goto done;
1478 }
1479 switch (opt) {
1480 case CFIL_OPT_NECP_CONTROL_UNIT:
1481 if (*len < sizeof(uint32_t)) {
1482 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1483 error = EINVAL;
1484 goto done;
1485 }
5ba3f43e 1486 if (data != NULL) {
fe8ab488 1487 *(uint32_t *)data = cfc->cf_necp_control_unit;
5ba3f43e 1488 }
fe8ab488 1489 break;
5ba3f43e
A
1490 case CFIL_OPT_GET_SOCKET_INFO:
1491 if (*len != sizeof(struct cfil_opt_sock_info)) {
1492 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1493 error = EINVAL;
1494 goto done;
1495 }
1496 if (data == NULL) {
1497 CFIL_LOG(LOG_ERR, "data not passed");
1498 error = EINVAL;
1499 goto done;
1500 }
1501
1502 struct cfil_opt_sock_info *sock_info =
1503 (struct cfil_opt_sock_info *) data;
1504 struct socket *sock =
1505 cfil_socket_from_sock_id(sock_info->cfs_sock_id);
1506 if (sock == NULL) {
1507 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1508 sock_info->cfs_sock_id);
1509 error = ENOENT;
1510 goto done;
1511 }
1512
1513 // Unlock here so that we never hold both cfil_lck_rw and the
1514 // socket_lock at the same time. Otherwise, this can deadlock
1515 // because soclose() takes the socket_lock and then exclusive
1516 // cfil_lck_rw and we require the opposite order.
1517
1518 // WARNING: Be sure to never use anything protected
1519 // by cfil_lck_rw beyond this point.
1520 // WARNING: Be sure to avoid fallthrough and
1521 // goto return_already_unlocked from this branch.
1522 cfil_rw_unlock_shared(&cfil_lck_rw);
1523
1524 socket_lock(sock, 1);
1525
1526 if (sock->so_cfil == NULL) {
1527 CFIL_LOG(LOG_NOTICE, "so %llx not attached, cannot fetch info",
1528 (uint64_t)VM_KERNEL_ADDRPERM(sock));
1529 error = EINVAL;
1530 socket_unlock(sock, 1);
1531 goto return_already_unlocked;
1532 }
1533
1534 // Fill out family, type, and protocol
1535 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1536 sock_info->cfs_sock_type = sock->so_proto->pr_type;
1537 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1538
1539 // Source and destination addresses
1540 struct inpcb *inp = sotoinpcb(sock);
1541 if (inp->inp_vflag & INP_IPV6) {
1542 fill_ip6_sockaddr_4_6(&sock_info->cfs_local,
1543 &inp->in6p_laddr, inp->inp_lport);
1544 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote,
1545 &inp->in6p_faddr, inp->inp_fport);
1546 } else if (inp->inp_vflag & INP_IPV4) {
1547 fill_ip_sockaddr_4_6(&sock_info->cfs_local,
1548 inp->inp_laddr, inp->inp_lport);
1549 fill_ip_sockaddr_4_6(&sock_info->cfs_remote,
1550 inp->inp_faddr, inp->inp_fport);
1551 }
1552
1553 // Set the pid info
1554 sock_info->cfs_pid = sock->last_pid;
1555 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1556
1557 if (sock->so_flags & SOF_DELEGATED) {
1558 sock_info->cfs_e_pid = sock->e_pid;
1559 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1560 } else {
1561 sock_info->cfs_e_pid = sock->last_pid;
1562 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1563 }
1564
1565 socket_unlock(sock, 1);
1566
1567 goto return_already_unlocked;
fe8ab488
A
1568 default:
1569 error = ENOPROTOOPT;
1570 break;
1571 }
1572done:
1573 cfil_rw_unlock_shared(&cfil_lck_rw);
1574
5ba3f43e
A
1575 return (error);
1576
1577return_already_unlocked:
1578
fe8ab488
A
1579 return (error);
1580}
1581
1582static errno_t
1583cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1584 int opt, void *data, size_t len)
1585{
1586#pragma unused(kctlref, opt)
1587 errno_t error = 0;
1588 struct content_filter *cfc = (struct content_filter *)unitinfo;
1589
1590 CFIL_LOG(LOG_NOTICE, "");
1591
1592 cfil_rw_lock_exclusive(&cfil_lck_rw);
1593
1594 if (content_filters == NULL) {
1595 CFIL_LOG(LOG_ERR, "no content filter");
1596 error = EINVAL;
1597 goto done;
1598 }
1599 if (kcunit > MAX_CONTENT_FILTER) {
1600 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1601 kcunit, MAX_CONTENT_FILTER);
1602 error = EINVAL;
1603 goto done;
1604 }
1605 if (cfc != (void *)content_filters[kcunit - 1]) {
1606 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1607 kcunit);
1608 error = EINVAL;
1609 goto done;
1610 }
1611 switch (opt) {
1612 case CFIL_OPT_NECP_CONTROL_UNIT:
1613 if (len < sizeof(uint32_t)) {
1614 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1615 "len too small %lu", len);
1616 error = EINVAL;
1617 goto done;
1618 }
1619 if (cfc->cf_necp_control_unit != 0) {
1620 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1621 "already set %u",
1622 cfc->cf_necp_control_unit);
1623 error = EINVAL;
1624 goto done;
1625 }
1626 cfc->cf_necp_control_unit = *(uint32_t *)data;
1627 break;
1628 default:
1629 error = ENOPROTOOPT;
1630 break;
1631 }
1632done:
1633 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1634
1635 return (error);
1636}
1637
1638
1639static void
1640cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
1641{
1642#pragma unused(kctlref, flags)
1643 struct content_filter *cfc = (struct content_filter *)unitinfo;
1644 struct socket *so = NULL;
1645 int error;
1646 struct cfil_entry *entry;
1647
1648 CFIL_LOG(LOG_INFO, "");
1649
1650 if (content_filters == NULL) {
1651 CFIL_LOG(LOG_ERR, "no content filter");
1652 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1653 return;
1654 }
1655 if (kcunit > MAX_CONTENT_FILTER) {
1656 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1657 kcunit, MAX_CONTENT_FILTER);
1658 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1659 return;
1660 }
1661 cfil_rw_lock_shared(&cfil_lck_rw);
1662 if (cfc != (void *)content_filters[kcunit - 1]) {
1663 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1664 kcunit);
1665 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1666 goto done;
1667 }
1668 /* Let's assume the flow control is lifted */
1669 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
1670 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
1671 cfil_rw_lock_exclusive(&cfil_lck_rw);
1672
1673 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
1674
1675 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
5ba3f43e 1676 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
fe8ab488
A
1677 }
1678 /*
1679 * Flow control will be raised again as soon as an entry cannot enqueue
1680 * to the kernel control socket
1681 */
1682 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
1683 verify_content_filter(cfc);
1684
1685 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
1686
1687 /* Find an entry that is flow controlled */
1688 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1689 if (entry->cfe_cfil_info == NULL ||
1690 entry->cfe_cfil_info->cfi_so == NULL)
1691 continue;
1692 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0)
1693 continue;
1694 }
1695 if (entry == NULL)
1696 break;
1697
1698 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
1699
1700 so = entry->cfe_cfil_info->cfi_so;
1701
1702 cfil_rw_unlock_shared(&cfil_lck_rw);
1703 socket_lock(so, 1);
1704
1705 do {
1706 error = cfil_acquire_sockbuf(so, 1);
1707 if (error == 0)
1708 error = cfil_data_service_ctl_q(so, kcunit, 1);
1709 cfil_release_sockbuf(so, 1);
1710 if (error != 0)
1711 break;
1712
1713 error = cfil_acquire_sockbuf(so, 0);
1714 if (error == 0)
1715 error = cfil_data_service_ctl_q(so, kcunit, 0);
1716 cfil_release_sockbuf(so, 0);
1717 } while (0);
1718
1719 socket_lock_assert_owned(so);
1720 socket_unlock(so, 1);
1721
1722 cfil_rw_lock_shared(&cfil_lck_rw);
1723 }
1724done:
1725 cfil_rw_unlock_shared(&cfil_lck_rw);
1726}
1727
1728void
1729cfil_init(void)
1730{
1731 struct kern_ctl_reg kern_ctl;
1732 errno_t error = 0;
1733 vm_size_t content_filter_size = 0; /* size of content_filter */
1734 vm_size_t cfil_info_size = 0; /* size of cfil_info */
1735
1736 CFIL_LOG(LOG_NOTICE, "");
1737
1738 /*
1739 * Compile time verifications
1740 */
1741 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
1742 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
1743 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
1744 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
1745
1746 /*
1747 * Runtime time verifications
1748 */
1749 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
1750 sizeof(uint32_t)));
1751 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
1752 sizeof(uint32_t)));
1753 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
1754 sizeof(uint32_t)));
1755 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
1756 sizeof(uint32_t)));
1757
1758 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
1759 sizeof(uint32_t)));
1760 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
1761 sizeof(uint32_t)));
1762
1763 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
1764 sizeof(uint32_t)));
1765 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
1766 sizeof(uint32_t)));
1767 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
1768 sizeof(uint32_t)));
1769 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
1770 sizeof(uint32_t)));
1771
1772 /*
1773 * Zone for content filters kernel control sockets
1774 */
1775 content_filter_size = sizeof(struct content_filter);
1776 content_filter_zone = zinit(content_filter_size,
1777 CONTENT_FILTER_ZONE_MAX * content_filter_size,
1778 0,
1779 CONTENT_FILTER_ZONE_NAME);
1780 if (content_filter_zone == NULL) {
1781 panic("%s: zinit(%s) failed", __func__,
1782 CONTENT_FILTER_ZONE_NAME);
1783 /* NOTREACHED */
1784 }
1785 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
1786 zone_change(content_filter_zone, Z_EXPAND, TRUE);
1787
1788 /*
1789 * Zone for per socket content filters
1790 */
1791 cfil_info_size = sizeof(struct cfil_info);
1792 cfil_info_zone = zinit(cfil_info_size,
1793 CFIL_INFO_ZONE_MAX * cfil_info_size,
1794 0,
1795 CFIL_INFO_ZONE_NAME);
1796 if (cfil_info_zone == NULL) {
1797 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
1798 /* NOTREACHED */
1799 }
1800 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
1801 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
1802
1803 /*
1804 * Allocate locks
1805 */
1806 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
1807 if (cfil_lck_grp_attr == NULL) {
1808 panic("%s: lck_grp_attr_alloc_init failed", __func__);
1809 /* NOTREACHED */
1810 }
1811 cfil_lck_grp = lck_grp_alloc_init("content filter",
1812 cfil_lck_grp_attr);
1813 if (cfil_lck_grp == NULL) {
1814 panic("%s: lck_grp_alloc_init failed", __func__);
1815 /* NOTREACHED */
1816 }
1817 cfil_lck_attr = lck_attr_alloc_init();
1818 if (cfil_lck_attr == NULL) {
1819 panic("%s: lck_attr_alloc_init failed", __func__);
1820 /* NOTREACHED */
1821 }
1822 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
1823
1824 TAILQ_INIT(&cfil_sock_head);
1825
1826 /*
1827 * Register kernel control
1828 */
1829 bzero(&kern_ctl, sizeof(kern_ctl));
1830 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
1831 sizeof(kern_ctl.ctl_name));
1832 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
1833 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
1834 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
1835 kern_ctl.ctl_connect = cfil_ctl_connect;
1836 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
1837 kern_ctl.ctl_send = cfil_ctl_send;
1838 kern_ctl.ctl_getopt = cfil_ctl_getopt;
1839 kern_ctl.ctl_setopt = cfil_ctl_setopt;
1840 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
1841 error = ctl_register(&kern_ctl, &cfil_kctlref);
1842 if (error != 0) {
1843 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
1844 return;
1845 }
1846}
1847
1848struct cfil_info *
1849cfil_info_alloc(struct socket *so)
1850{
1851 int kcunit;
1852 struct cfil_info *cfil_info = NULL;
1853 struct inpcb *inp = sotoinpcb(so);
1854
1855 CFIL_LOG(LOG_INFO, "");
1856
1857 socket_lock_assert_owned(so);
1858
1859 cfil_info = zalloc(cfil_info_zone);
1860 if (cfil_info == NULL)
1861 goto done;
1862 bzero(cfil_info, sizeof(struct cfil_info));
1863
1864 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
1865 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
1866
1867 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1868 struct cfil_entry *entry;
1869
1870 entry = &cfil_info->cfi_entries[kcunit - 1];
1871 entry->cfe_cfil_info = cfil_info;
1872
1873 /* Initialize the filter entry */
1874 entry->cfe_filter = NULL;
1875 entry->cfe_flags = 0;
1876 entry->cfe_necp_control_unit = 0;
1877 entry->cfe_snd.cfe_pass_offset = 0;
1878 entry->cfe_snd.cfe_peek_offset = 0;
1879 entry->cfe_snd.cfe_peeked = 0;
1880 entry->cfe_rcv.cfe_pass_offset = 0;
1881 entry->cfe_rcv.cfe_peek_offset = 0;
1882 entry->cfe_rcv.cfe_peeked = 0;
1883
1884 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
1885 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
1886 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
1887 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
1888 }
1889
1890 cfil_rw_lock_exclusive(&cfil_lck_rw);
1891
1892 so->so_cfil = cfil_info;
1893 cfil_info->cfi_so = so;
1894 /*
1895 * Create a cfi_sock_id that's not the socket pointer!
1896 */
1897 if (inp->inp_flowhash == 0)
1898 inp->inp_flowhash = inp_calc_flowhash(inp);
1899 cfil_info->cfi_sock_id =
1900 ((so->so_gencnt << 32) | inp->inp_flowhash);
1901
1902 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
1903
1904 cfil_sock_attached_count++;
1905
1906 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1907
1908done:
1909 if (cfil_info != NULL)
1910 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
1911 else
1912 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
1913
1914 return (cfil_info);
1915}
1916
1917int
1918cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit)
1919{
1920 int kcunit;
1921 struct cfil_info *cfil_info = so->so_cfil;
1922 int attached = 0;
1923
1924 CFIL_LOG(LOG_INFO, "");
1925
1926 socket_lock_assert_owned(so);
1927
1928 cfil_rw_lock_exclusive(&cfil_lck_rw);
1929
1930 for (kcunit = 1;
1931 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
1932 kcunit++) {
1933 struct content_filter *cfc = content_filters[kcunit - 1];
1934 struct cfil_entry *entry;
1935
1936 if (cfc == NULL)
1937 continue;
1938 if (cfc->cf_necp_control_unit != filter_control_unit)
1939 continue;
1940
1941 entry = &cfil_info->cfi_entries[kcunit - 1];
1942
1943 entry->cfe_filter = cfc;
1944 entry->cfe_necp_control_unit = filter_control_unit;
1945 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
1946 cfc->cf_sock_count++;
1947 verify_content_filter(cfc);
1948 attached = 1;
1949 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
1950 break;
1951 }
1952
1953 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1954
1955 return (attached);
1956}
1957
1958static void
1959cfil_info_free(struct socket *so, struct cfil_info *cfil_info)
1960{
1961 int kcunit;
1962 uint64_t in_drain = 0;
1963 uint64_t out_drained = 0;
1964
1965 so->so_cfil = NULL;
1966
1967 if (so->so_flags & SOF_CONTENT_FILTER) {
1968 so->so_flags &= ~SOF_CONTENT_FILTER;
d190cdc3 1969 VERIFY(so->so_usecount > 0);
fe8ab488
A
1970 so->so_usecount--;
1971 }
1972 if (cfil_info == NULL)
1973 return;
1974
1975 CFIL_LOG(LOG_INFO, "");
1976
1977 cfil_rw_lock_exclusive(&cfil_lck_rw);
1978
1979 for (kcunit = 1;
1980 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
1981 kcunit++) {
1982 struct cfil_entry *entry;
1983 struct content_filter *cfc;
1984
1985 entry = &cfil_info->cfi_entries[kcunit - 1];
1986
1987 /* Don't be silly and try to detach twice */
1988 if (entry->cfe_filter == NULL)
1989 continue;
1990
1991 cfc = content_filters[kcunit - 1];
1992
1993 VERIFY(cfc == entry->cfe_filter);
1994
1995 entry->cfe_filter = NULL;
1996 entry->cfe_necp_control_unit = 0;
1997 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1998 cfc->cf_sock_count--;
1999
2000 verify_content_filter(cfc);
2001 }
2002 cfil_sock_attached_count--;
2003 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2004
2005 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2006 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2007
2008 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2009 struct cfil_entry *entry;
2010
2011 entry = &cfil_info->cfi_entries[kcunit - 1];
2012 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2013 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2014 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2015 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2016 }
2017 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2018
2019 if (out_drained)
2020 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2021 if (in_drain)
2022 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2023
2024 zfree(cfil_info_zone, cfil_info);
2025}
2026
2027/*
2028 * Entry point from Sockets layer
2029 * The socket is locked.
2030 */
2031errno_t
2032cfil_sock_attach(struct socket *so)
2033{
2034 errno_t error = 0;
2035 uint32_t filter_control_unit;
2036
2037 socket_lock_assert_owned(so);
2038
813fb2f6 2039 /* Limit ourselves to TCP that are not MPTCP subflows */
fe8ab488
A
2040 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2041 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2042 so->so_proto->pr_type != SOCK_STREAM ||
813fb2f6 2043 so->so_proto->pr_protocol != IPPROTO_TCP ||
5ba3f43e
A
2044 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2045 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
fe8ab488
A
2046 goto done;
2047
2048 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2049 if (filter_control_unit == 0)
2050 goto done;
2051
2052 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2053 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2054 goto done;
2055 }
2056 if (cfil_active_count == 0) {
2057 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2058 goto done;
2059 }
2060 if (so->so_cfil != NULL) {
2061 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2062 CFIL_LOG(LOG_ERR, "already attached");
2063 } else {
2064 cfil_info_alloc(so);
2065 if (so->so_cfil == NULL) {
2066 error = ENOMEM;
2067 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2068 goto done;
2069 }
2070 }
2071 if (cfil_info_attach_unit(so, filter_control_unit) == 0) {
2072 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2073 filter_control_unit);
2074 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2075 goto done;
2076 }
2077 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx",
2078 (uint64_t)VM_KERNEL_ADDRPERM(so),
2079 filter_control_unit, so->so_cfil->cfi_sock_id);
2080
2081 so->so_flags |= SOF_CONTENT_FILTER;
2082 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2083
2084 /* Hold a reference on the socket */
2085 so->so_usecount++;
2086
2087 error = cfil_dispatch_attach_event(so, filter_control_unit);
2088 /* We can recover from flow control or out of memory errors */
2089 if (error == ENOBUFS || error == ENOMEM)
2090 error = 0;
2091 else if (error != 0)
2092 goto done;
2093
2094 CFIL_INFO_VERIFY(so->so_cfil);
2095done:
2096 return (error);
2097}
2098
2099/*
2100 * Entry point from Sockets layer
2101 * The socket is locked.
2102 */
2103errno_t
2104cfil_sock_detach(struct socket *so)
2105{
2106 if (so->so_cfil) {
2107 cfil_info_free(so, so->so_cfil);
2108 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2109 }
2110 return (0);
2111}
2112
2113static int
2114cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit)
2115{
2116 errno_t error = 0;
2117 struct cfil_entry *entry = NULL;
2118 struct cfil_msg_sock_attached msg_attached;
2119 uint32_t kcunit;
5ba3f43e 2120 struct content_filter *cfc = NULL;
fe8ab488
A
2121
2122 socket_lock_assert_owned(so);
2123
2124 cfil_rw_lock_shared(&cfil_lck_rw);
2125
2126 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2127 error = EINVAL;
2128 goto done;
2129 }
2130 /*
2131 * Find the matching filter unit
2132 */
2133 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2134 cfc = content_filters[kcunit - 1];
2135
2136 if (cfc == NULL)
2137 continue;
2138 if (cfc->cf_necp_control_unit != filter_control_unit)
2139 continue;
2140 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2141 if (entry->cfe_filter == NULL)
2142 continue;
2143
2144 VERIFY(cfc == entry->cfe_filter);
2145
2146 break;
2147 }
2148
2149 if (entry == NULL || entry->cfe_filter == NULL)
2150 goto done;
2151
2152 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
2153 goto done;
2154
2155 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2156 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
2157
2158 /* Would be wasteful to try when flow controlled */
2159 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2160 error = ENOBUFS;
2161 goto done;
2162 }
2163
2164 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2165 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2166 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2167 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2168 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2169 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2170
2171 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2172 msg_attached.cfs_sock_type = so->so_proto->pr_type;
2173 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2174 msg_attached.cfs_pid = so->last_pid;
2175 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2176 if (so->so_flags & SOF_DELEGATED) {
2177 msg_attached.cfs_e_pid = so->e_pid;
2178 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2179 } else {
2180 msg_attached.cfs_e_pid = so->last_pid;
2181 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2182 }
2183 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2184 entry->cfe_filter->cf_kcunit,
2185 &msg_attached,
2186 sizeof(struct cfil_msg_sock_attached),
2187 CTL_DATA_EOR);
2188 if (error != 0) {
2189 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2190 goto done;
2191 }
2192 microuptime(&entry->cfe_last_event);
5ba3f43e
A
2193 so->so_cfil->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
2194 so->so_cfil->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
2195
fe8ab488
A
2196 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
2197 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2198done:
2199
2200 /* We can recover from flow control */
2201 if (error == ENOBUFS) {
2202 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2203 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2204
2205 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2206 cfil_rw_lock_exclusive(&cfil_lck_rw);
2207
2208 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2209
2210 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2211 } else {
2212 if (error != 0)
2213 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
2214
2215 cfil_rw_unlock_shared(&cfil_lck_rw);
2216 }
2217 return (error);
2218}
2219
2220static int
2221cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing)
2222{
2223 errno_t error = 0;
2224 struct mbuf *msg = NULL;
2225 struct cfil_entry *entry;
2226 struct cfe_buf *entrybuf;
2227 struct cfil_msg_hdr msg_disconnected;
2228 struct content_filter *cfc;
2229
2230 socket_lock_assert_owned(so);
2231
2232 cfil_rw_lock_shared(&cfil_lck_rw);
2233
2234 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2235 if (outgoing)
2236 entrybuf = &entry->cfe_snd;
2237 else
2238 entrybuf = &entry->cfe_rcv;
2239
2240 cfc = entry->cfe_filter;
2241 if (cfc == NULL)
2242 goto done;
2243
2244 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2245 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2246
2247 /*
2248 * Send the disconnection event once
2249 */
2250 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
2251 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
2252 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
2253 (uint64_t)VM_KERNEL_ADDRPERM(so));
2254 goto done;
2255 }
2256
2257 /*
2258 * We're not disconnected as long as some data is waiting
2259 * to be delivered to the filter
2260 */
2261 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
2262 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
2263 (uint64_t)VM_KERNEL_ADDRPERM(so));
2264 error = EBUSY;
2265 goto done;
2266 }
2267 /* Would be wasteful to try when flow controlled */
2268 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2269 error = ENOBUFS;
2270 goto done;
2271 }
2272
2273 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2274 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2275 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2276 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2277 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
2278 CFM_OP_DISCONNECT_IN;
2279 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2280 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2281 entry->cfe_filter->cf_kcunit,
2282 &msg_disconnected,
2283 sizeof(struct cfil_msg_hdr),
2284 CTL_DATA_EOR);
2285 if (error != 0) {
2286 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2287 mbuf_freem(msg);
2288 goto done;
2289 }
2290 microuptime(&entry->cfe_last_event);
5ba3f43e 2291 CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_event, &so->so_cfil->cfi_first_event, msg_disconnected.cfm_op);
fe8ab488
A
2292
2293 /* Remember we have sent the disconnection message */
2294 if (outgoing) {
2295 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
2296 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2297 } else {
2298 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
2299 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2300 }
2301done:
2302 if (error == ENOBUFS) {
2303 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2304 OSIncrementAtomic(
2305 &cfil_stats.cfs_disconnect_event_flow_control);
2306
2307 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2308 cfil_rw_lock_exclusive(&cfil_lck_rw);
2309
2310 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2311
2312 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2313 } else {
2314 if (error != 0)
2315 OSIncrementAtomic(
2316 &cfil_stats.cfs_disconnect_event_fail);
2317
2318 cfil_rw_unlock_shared(&cfil_lck_rw);
2319 }
2320 return (error);
2321}
2322
2323int
2324cfil_dispatch_closed_event(struct socket *so, int kcunit)
2325{
2326 struct cfil_entry *entry;
5ba3f43e 2327 struct cfil_msg_sock_closed msg_closed;
fe8ab488
A
2328 errno_t error = 0;
2329 struct content_filter *cfc;
2330
2331 socket_lock_assert_owned(so);
2332
2333 cfil_rw_lock_shared(&cfil_lck_rw);
2334
2335 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2336 cfc = entry->cfe_filter;
2337 if (cfc == NULL)
2338 goto done;
2339
2340 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
2341 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2342
2343 /* Would be wasteful to try when flow controlled */
2344 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2345 error = ENOBUFS;
2346 goto done;
2347 }
2348 /*
2349 * Send a single closed message per filter
2350 */
2351 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0)
2352 goto done;
2353 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
2354 goto done;
2355
5ba3f43e
A
2356 microuptime(&entry->cfe_last_event);
2357 CFI_ADD_TIME_LOG(so->so_cfil, &entry->cfe_last_event, &so->so_cfil->cfi_first_event, CFM_OP_SOCKET_CLOSED);
2358
2359 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
2360 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
2361 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
2362 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
2363 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
2364 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2365 msg_closed.cfc_first_event.tv_sec = so->so_cfil->cfi_first_event.tv_sec;
2366 msg_closed.cfc_first_event.tv_usec = so->so_cfil->cfi_first_event.tv_usec;
2367 memcpy(msg_closed.cfc_op_time, so->so_cfil->cfi_op_time, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY);
2368 memcpy(msg_closed.cfc_op_list, so->so_cfil->cfi_op_list, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY);
2369 msg_closed.cfc_op_list_ctr = so->so_cfil->cfi_op_list_ctr;
2370
2371 CFIL_LOG(LOG_INFO, "sock id %llu, op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, so->so_cfil->cfi_op_list_ctr, so->so_cfil->cfi_first_event.tv_sec, so->so_cfil->cfi_first_event.tv_usec);
2372 /* for debugging
2373 if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2374 msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2375 }
2376 for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2377 CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2378 }
2379 */
2380
fe8ab488
A
2381 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2382 entry->cfe_filter->cf_kcunit,
2383 &msg_closed,
5ba3f43e 2384 sizeof(struct cfil_msg_sock_closed),
fe8ab488
A
2385 CTL_DATA_EOR);
2386 if (error != 0) {
2387 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
2388 error);
2389 goto done;
2390 }
5ba3f43e 2391
fe8ab488
A
2392 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
2393 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2394done:
2395 /* We can recover from flow control */
2396 if (error == ENOBUFS) {
2397 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2398 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2399
2400 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2401 cfil_rw_lock_exclusive(&cfil_lck_rw);
2402
2403 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2404
2405 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2406 } else {
2407 if (error != 0)
2408 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
2409
2410 cfil_rw_unlock_shared(&cfil_lck_rw);
2411 }
2412
2413 return (error);
2414}
2415
2416static void
2417fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2418 struct in6_addr *ip6, u_int16_t port)
2419{
2420 struct sockaddr_in6 *sin6 = &sin46->sin6;
2421
2422 sin6->sin6_family = AF_INET6;
2423 sin6->sin6_len = sizeof(*sin6);
2424 sin6->sin6_port = port;
2425 sin6->sin6_addr = *ip6;
2426 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2427 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
2428 sin6->sin6_addr.s6_addr16[1] = 0;
2429 }
2430}
2431
2432static void
2433fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2434 struct in_addr ip, u_int16_t port)
2435{
2436 struct sockaddr_in *sin = &sin46->sin;
2437
2438 sin->sin_family = AF_INET;
2439 sin->sin_len = sizeof(*sin);
2440 sin->sin_port = port;
2441 sin->sin_addr.s_addr = ip.s_addr;
2442}
2443
2444static int
2445cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing,
2446 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
2447{
2448 errno_t error = 0;
2449 struct mbuf *copy = NULL;
2450 struct mbuf *msg = NULL;
2451 unsigned int one = 1;
2452 struct cfil_msg_data_event *data_req;
2453 size_t hdrsize;
2454 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2455 struct cfil_entry *entry;
2456 struct cfe_buf *entrybuf;
2457 struct content_filter *cfc;
5ba3f43e 2458 struct timeval tv;
fe8ab488
A
2459
2460 cfil_rw_lock_shared(&cfil_lck_rw);
2461
2462 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2463 if (outgoing)
2464 entrybuf = &entry->cfe_snd;
2465 else
2466 entrybuf = &entry->cfe_rcv;
2467
2468 cfc = entry->cfe_filter;
2469 if (cfc == NULL)
2470 goto done;
2471
2472 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2473 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2474
2475 socket_lock_assert_owned(so);
2476
2477 /* Would be wasteful to try */
2478 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2479 error = ENOBUFS;
2480 goto done;
2481 }
2482
2483 /* Make a copy of the data to pass to kernel control socket */
2484 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
2485 M_COPYM_NOOP_HDR);
2486 if (copy == NULL) {
2487 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2488 error = ENOMEM;
2489 goto done;
2490 }
2491
2492 /* We need an mbuf packet for the message header */
2493 hdrsize = sizeof(struct cfil_msg_data_event);
2494 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2495 if (error != 0) {
2496 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2497 m_freem(copy);
2498 /*
2499 * ENOBUFS is to indicate flow control
2500 */
2501 error = ENOMEM;
2502 goto done;
2503 }
2504 mbuf_setlen(msg, hdrsize);
2505 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2506 msg->m_next = copy;
2507 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2508 bzero(data_req, hdrsize);
2509 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2510 data_req->cfd_msghdr.cfm_version = 1;
2511 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2512 data_req->cfd_msghdr.cfm_op =
2513 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
2514 data_req->cfd_msghdr.cfm_sock_id =
2515 entry->cfe_cfil_info->cfi_sock_id;
2516 data_req->cfd_start_offset = entrybuf->cfe_peeked;
2517 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2518
2519 /*
2520 * TBD:
2521 * For non connected sockets need to copy addresses from passed
2522 * parameters
2523 */
2524 if (inp->inp_vflag & INP_IPV6) {
2525 if (outgoing) {
2526 fill_ip6_sockaddr_4_6(&data_req->cfc_src,
2527 &inp->in6p_laddr, inp->inp_lport);
2528 fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
2529 &inp->in6p_faddr, inp->inp_fport);
2530 } else {
2531 fill_ip6_sockaddr_4_6(&data_req->cfc_src,
2532 &inp->in6p_faddr, inp->inp_fport);
2533 fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
2534 &inp->in6p_laddr, inp->inp_lport);
2535 }
2536 } else if (inp->inp_vflag & INP_IPV4) {
2537 if (outgoing) {
2538 fill_ip_sockaddr_4_6(&data_req->cfc_src,
2539 inp->inp_laddr, inp->inp_lport);
2540 fill_ip_sockaddr_4_6(&data_req->cfc_dst,
2541 inp->inp_faddr, inp->inp_fport);
2542 } else {
2543 fill_ip_sockaddr_4_6(&data_req->cfc_src,
2544 inp->inp_faddr, inp->inp_fport);
2545 fill_ip_sockaddr_4_6(&data_req->cfc_dst,
2546 inp->inp_laddr, inp->inp_lport);
2547 }
2548 }
2549
5ba3f43e
A
2550 microuptime(&tv);
2551 CFI_ADD_TIME_LOG(so->so_cfil, &tv, &so->so_cfil->cfi_first_event, data_req->cfd_msghdr.cfm_op);
2552
fe8ab488
A
2553 /* Pass the message to the content filter */
2554 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
2555 entry->cfe_filter->cf_kcunit,
2556 msg, CTL_DATA_EOR);
2557 if (error != 0) {
2558 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2559 mbuf_freem(msg);
2560 goto done;
2561 }
2562 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
2563 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
2564done:
2565 if (error == ENOBUFS) {
2566 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2567 OSIncrementAtomic(
2568 &cfil_stats.cfs_data_event_flow_control);
2569
2570 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2571 cfil_rw_lock_exclusive(&cfil_lck_rw);
2572
2573 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2574
2575 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2576 } else {
2577 if (error != 0)
2578 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
2579
2580 cfil_rw_unlock_shared(&cfil_lck_rw);
2581 }
2582 return (error);
2583}
2584
2585/*
2586 * Process the queue of data waiting to be delivered to content filter
2587 */
2588static int
2589cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing)
2590{
2591 errno_t error = 0;
2592 struct mbuf *data, *tmp = NULL;
2593 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
2594 struct cfil_entry *entry;
2595 struct cfe_buf *entrybuf;
2596 uint64_t currentoffset = 0;
2597
2598 if (so->so_cfil == NULL)
2599 return (0);
2600
2601 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2602 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2603
2604 socket_lock_assert_owned(so);
2605
2606 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2607 if (outgoing)
2608 entrybuf = &entry->cfe_snd;
2609 else
2610 entrybuf = &entry->cfe_rcv;
2611
2612 /* Send attached message if not yet done */
2613 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
2614 error = cfil_dispatch_attach_event(so, kcunit);
2615 if (error != 0) {
2616 /* We can recover from flow control */
2617 if (error == ENOBUFS || error == ENOMEM)
2618 error = 0;
2619 goto done;
2620 }
2621 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
2622 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
2623 goto done;
2624 }
2625 CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu",
2626 entrybuf->cfe_pass_offset,
2627 entrybuf->cfe_peeked,
2628 entrybuf->cfe_peek_offset);
2629
2630 /* Move all data that can pass */
2631 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
2632 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
2633 datalen = cfil_data_length(data, NULL);
2634 tmp = data;
2635
2636 if (entrybuf->cfe_ctl_q.q_start + datalen <=
2637 entrybuf->cfe_pass_offset) {
2638 /*
2639 * The first mbuf can fully pass
2640 */
2641 copylen = datalen;
2642 } else {
2643 /*
2644 * The first mbuf can partially pass
2645 */
2646 copylen = entrybuf->cfe_pass_offset -
2647 entrybuf->cfe_ctl_q.q_start;
2648 }
2649 VERIFY(copylen <= datalen);
2650
2651 CFIL_LOG(LOG_DEBUG,
2652 "%llx first %llu peeked %llu pass %llu peek %llu"
2653 "datalen %u copylen %u",
2654 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2655 entrybuf->cfe_ctl_q.q_start,
2656 entrybuf->cfe_peeked,
2657 entrybuf->cfe_pass_offset,
2658 entrybuf->cfe_peek_offset,
2659 datalen, copylen);
2660
2661 /*
2662 * Data that passes has been peeked at explicitly or
2663 * implicitly
2664 */
2665 if (entrybuf->cfe_ctl_q.q_start + copylen >
2666 entrybuf->cfe_peeked)
2667 entrybuf->cfe_peeked =
2668 entrybuf->cfe_ctl_q.q_start + copylen;
2669 /*
2670 * Stop on partial pass
2671 */
2672 if (copylen < datalen)
2673 break;
2674
2675 /* All good, move full data from ctl queue to pending queue */
2676 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
2677
2678 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
2679 if (outgoing)
2680 OSAddAtomic64(datalen,
2681 &cfil_stats.cfs_pending_q_out_enqueued);
2682 else
2683 OSAddAtomic64(datalen,
2684 &cfil_stats.cfs_pending_q_in_enqueued);
2685 }
2686 CFIL_INFO_VERIFY(so->so_cfil);
2687 if (tmp != NULL)
2688 CFIL_LOG(LOG_DEBUG,
2689 "%llx first %llu peeked %llu pass %llu peek %llu"
2690 "datalen %u copylen %u",
2691 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2692 entrybuf->cfe_ctl_q.q_start,
2693 entrybuf->cfe_peeked,
2694 entrybuf->cfe_pass_offset,
2695 entrybuf->cfe_peek_offset,
2696 datalen, copylen);
2697 tmp = NULL;
2698
2699 /* Now deal with remaining data the filter wants to peek at */
2700 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
2701 currentoffset = entrybuf->cfe_ctl_q.q_start;
2702 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
2703 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
2704 currentoffset += datalen) {
2705 datalen = cfil_data_length(data, NULL);
2706 tmp = data;
2707
2708 /* We've already peeked at this mbuf */
2709 if (currentoffset + datalen <= entrybuf->cfe_peeked)
2710 continue;
2711 /*
2712 * The data in the first mbuf may have been
2713 * partially peeked at
2714 */
2715 copyoffset = entrybuf->cfe_peeked - currentoffset;
2716 VERIFY(copyoffset < datalen);
2717 copylen = datalen - copyoffset;
2718 VERIFY(copylen <= datalen);
2719 /*
2720 * Do not copy more than needed
2721 */
2722 if (currentoffset + copyoffset + copylen >
2723 entrybuf->cfe_peek_offset) {
2724 copylen = entrybuf->cfe_peek_offset -
2725 (currentoffset + copyoffset);
2726 }
2727
2728 CFIL_LOG(LOG_DEBUG,
2729 "%llx current %llu peeked %llu pass %llu peek %llu"
2730 "datalen %u copylen %u copyoffset %u",
2731 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2732 currentoffset,
2733 entrybuf->cfe_peeked,
2734 entrybuf->cfe_pass_offset,
2735 entrybuf->cfe_peek_offset,
2736 datalen, copylen, copyoffset);
2737
2738 /*
2739 * Stop if there is nothing more to peek at
2740 */
2741 if (copylen == 0)
2742 break;
2743 /*
2744 * Let the filter get a peek at this span of data
2745 */
2746 error = cfil_dispatch_data_event(so, kcunit,
2747 outgoing, data, copyoffset, copylen);
2748 if (error != 0) {
2749 /* On error, leave data in ctl_q */
2750 break;
2751 }
2752 entrybuf->cfe_peeked += copylen;
2753 if (outgoing)
2754 OSAddAtomic64(copylen,
2755 &cfil_stats.cfs_ctl_q_out_peeked);
2756 else
2757 OSAddAtomic64(copylen,
2758 &cfil_stats.cfs_ctl_q_in_peeked);
2759
2760 /* Stop when data could not be fully peeked at */
2761 if (copylen + copyoffset < datalen)
2762 break;
2763 }
2764 CFIL_INFO_VERIFY(so->so_cfil);
2765 if (tmp != NULL)
2766 CFIL_LOG(LOG_DEBUG,
2767 "%llx first %llu peeked %llu pass %llu peek %llu"
2768 "datalen %u copylen %u copyoffset %u",
2769 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2770 currentoffset,
2771 entrybuf->cfe_peeked,
2772 entrybuf->cfe_pass_offset,
2773 entrybuf->cfe_peek_offset,
2774 datalen, copylen, copyoffset);
2775
2776 /*
2777 * Process data that has passed the filter
2778 */
2779 error = cfil_service_pending_queue(so, kcunit, outgoing);
2780 if (error != 0) {
2781 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
2782 error);
2783 goto done;
2784 }
2785
2786 /*
2787 * Dispatch disconnect events that could not be sent
2788 */
2789 if (so->so_cfil == NULL)
2790 goto done;
2791 else if (outgoing) {
2792 if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) &&
2793 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
2794 cfil_dispatch_disconnect_event(so, kcunit, 1);
2795 } else {
2796 if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) &&
2797 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
2798 cfil_dispatch_disconnect_event(so, kcunit, 0);
2799 }
2800
2801done:
2802 CFIL_LOG(LOG_DEBUG,
2803 "first %llu peeked %llu pass %llu peek %llu",
2804 entrybuf->cfe_ctl_q.q_start,
2805 entrybuf->cfe_peeked,
2806 entrybuf->cfe_pass_offset,
2807 entrybuf->cfe_peek_offset);
2808
2809 CFIL_INFO_VERIFY(so->so_cfil);
2810 return (error);
2811}
2812
2813/*
2814 * cfil_data_filter()
2815 *
2816 * Process data for a content filter installed on a socket
2817 */
2818int
2819cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing,
2820 struct mbuf *data, uint64_t datalen)
2821{
2822 errno_t error = 0;
2823 struct cfil_entry *entry;
2824 struct cfe_buf *entrybuf;
2825
2826 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2827 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2828
2829 socket_lock_assert_owned(so);
2830
2831 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2832 if (outgoing)
2833 entrybuf = &entry->cfe_snd;
2834 else
2835 entrybuf = &entry->cfe_rcv;
2836
2837 /* Are we attached to the filter? */
2838 if (entry->cfe_filter == NULL) {
2839 error = 0;
2840 goto done;
2841 }
2842
2843 /* Dispatch to filters */
2844 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
2845 if (outgoing)
2846 OSAddAtomic64(datalen,
2847 &cfil_stats.cfs_ctl_q_out_enqueued);
2848 else
2849 OSAddAtomic64(datalen,
2850 &cfil_stats.cfs_ctl_q_in_enqueued);
2851
2852 error = cfil_data_service_ctl_q(so, kcunit, outgoing);
2853 if (error != 0) {
2854 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
2855 error);
2856 }
2857 /*
2858 * We have to return EJUSTRETURN in all cases to avoid double free
2859 * by socket layer
2860 */
2861 error = EJUSTRETURN;
2862done:
2863 CFIL_INFO_VERIFY(so->so_cfil);
2864
2865 CFIL_LOG(LOG_INFO, "return %d", error);
2866 return (error);
2867}
2868
2869/*
2870 * cfil_service_inject_queue() re-inject data that passed the
2871 * content filters
2872 */
2873static int
2874cfil_service_inject_queue(struct socket *so, int outgoing)
2875{
2876 mbuf_t data;
2877 unsigned int datalen;
2878 int mbcnt;
2879 unsigned int copylen;
2880 errno_t error = 0;
2881 struct mbuf *copy = NULL;
2882 struct cfi_buf *cfi_buf;
2883 struct cfil_queue *inject_q;
2884 int need_rwakeup = 0;
2885
2886 if (so->so_cfil == NULL)
2887 return (0);
2888
2889 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
2890 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
2891
2892 socket_lock_assert_owned(so);
2893
2894 if (outgoing) {
2895 cfi_buf = &so->so_cfil->cfi_snd;
2896 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
2897 } else {
2898 cfi_buf = &so->so_cfil->cfi_rcv;
2899 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
2900 }
2901 inject_q = &cfi_buf->cfi_inject_q;
2902
2903 while ((data = cfil_queue_first(inject_q)) != NULL) {
2904 datalen = cfil_data_length(data, &mbcnt);
2905
2906 CFIL_LOG(LOG_INFO, "data %llx datalen %u",
2907 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen);
2908
2909 /* Make a copy in case of injection error */
2910 copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT,
2911 M_COPYM_COPY_HDR);
2912 if (copy == NULL) {
2913 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2914 error = ENOMEM;
2915 break;
2916 }
2917
2918 if ((copylen = m_length(copy)) != datalen)
2919 panic("%s so %p copylen %d != datalen %d",
2920 __func__, so, copylen, datalen);
2921
2922 if (outgoing) {
2923 socket_unlock(so, 0);
2924
2925 /*
2926 * Set both DONTWAIT and NBIO flags are we really
2927 * do not want to block
2928 */
2929 error = sosend(so, NULL, NULL,
2930 copy, NULL,
2931 MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO);
2932
2933 socket_lock(so, 0);
2934
2935 if (error != 0) {
2936 CFIL_LOG(LOG_ERR, "sosend() failed %d",
2937 error);
2938 }
2939 } else {
2940 copy->m_flags |= M_SKIPCFIL;
2941
2942 /*
2943 * NOTE:
2944 * This work only because we support plain TCP
2945 * For UDP, RAWIP, MPTCP and message TCP we'll
2946 * need to call the appropriate sbappendxxx()
2947 * of fix sock_inject_data_in()
2948 */
2949 if (sbappendstream(&so->so_rcv, copy))
2950 need_rwakeup = 1;
2951 }
2952
2953 /* Need to reassess if filter is still attached after unlock */
2954 if (so->so_cfil == NULL) {
2955 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
2956 (uint64_t)VM_KERNEL_ADDRPERM(so));
2957 OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached);
2958 error = 0;
2959 break;
2960 }
2961 if (error != 0)
2962 break;
2963
2964 /* Injection successful */
2965 cfil_queue_remove(inject_q, data, datalen);
2966 mbuf_freem(data);
2967
2968 cfi_buf->cfi_pending_first += datalen;
2969 cfi_buf->cfi_pending_mbcnt -= mbcnt;
2970 cfil_info_buf_verify(cfi_buf);
2971
2972 if (outgoing)
2973 OSAddAtomic64(datalen,
2974 &cfil_stats.cfs_inject_q_out_passed);
2975 else
2976 OSAddAtomic64(datalen,
2977 &cfil_stats.cfs_inject_q_in_passed);
2978 }
2979
2980 /* A single wakeup is for several packets is more efficient */
2981 if (need_rwakeup)
2982 sorwakeup(so);
2983
2984 if (error != 0 && so->so_cfil) {
2985 if (error == ENOBUFS)
2986 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
2987 if (error == ENOMEM)
2988 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
2989
2990 if (outgoing) {
2991 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT;
2992 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
2993 } else {
2994 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN;
2995 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
2996 }
2997 }
2998
2999 /*
3000 * Notify
3001 */
3002 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) {
3003 cfil_sock_notify_shutdown(so, SHUT_WR);
3004 if (cfil_sock_data_pending(&so->so_snd) == 0)
3005 soshutdownlock_final(so, SHUT_WR);
3006 }
3007 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
3008 if (cfil_filters_attached(so) == 0) {
3009 CFIL_LOG(LOG_INFO, "so %llx waking",
3010 (uint64_t)VM_KERNEL_ADDRPERM(so));
3011 wakeup((caddr_t)&so->so_cfil);
3012 }
3013 }
3014
3015 CFIL_INFO_VERIFY(so->so_cfil);
3016
3017 return (error);
3018}
3019
3020static int
3021cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing)
3022{
3023 uint64_t passlen, curlen;
3024 mbuf_t data;
3025 unsigned int datalen;
3026 errno_t error = 0;
3027 struct cfil_entry *entry;
3028 struct cfe_buf *entrybuf;
3029 struct cfil_queue *pending_q;
3030
3031 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3032 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3033
3034 socket_lock_assert_owned(so);
3035
3036 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3037 if (outgoing)
3038 entrybuf = &entry->cfe_snd;
3039 else
3040 entrybuf = &entry->cfe_rcv;
3041
3042 pending_q = &entrybuf->cfe_pending_q;
3043
3044 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3045
3046 /*
3047 * Locate the chunks of data that we can pass to the next filter
3048 * A data chunk must be on mbuf boundaries
3049 */
3050 curlen = 0;
3051 while ((data = cfil_queue_first(pending_q)) != NULL) {
3052 datalen = cfil_data_length(data, NULL);
3053
3054 CFIL_LOG(LOG_INFO,
3055 "data %llx datalen %u passlen %llu curlen %llu",
3056 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3057 passlen, curlen);
3058
3059 if (curlen + datalen > passlen)
3060 break;
3061
3062 cfil_queue_remove(pending_q, data, datalen);
3063
3064 curlen += datalen;
3065
3066 for (kcunit += 1;
3067 kcunit <= MAX_CONTENT_FILTER;
3068 kcunit++) {
3069 error = cfil_data_filter(so, kcunit, outgoing,
3070 data, datalen);
3071 /* 0 means passed so we can continue */
3072 if (error != 0)
3073 break;
3074 }
3075 /* When data has passed all filters, re-inject */
3076 if (error == 0) {
3077 if (outgoing) {
3078 cfil_queue_enqueue(
3079 &so->so_cfil->cfi_snd.cfi_inject_q,
3080 data, datalen);
3081 OSAddAtomic64(datalen,
3082 &cfil_stats.cfs_inject_q_out_enqueued);
3083 } else {
3084 cfil_queue_enqueue(
3085 &so->so_cfil->cfi_rcv.cfi_inject_q,
3086 data, datalen);
3087 OSAddAtomic64(datalen,
3088 &cfil_stats.cfs_inject_q_in_enqueued);
3089 }
3090 }
3091 }
3092
3093 CFIL_INFO_VERIFY(so->so_cfil);
3094
3095 return (error);
3096}
3097
3098int
3099cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing,
3100 uint64_t pass_offset, uint64_t peek_offset)
3101{
3102 errno_t error = 0;
3e170ce0 3103 struct cfil_entry *entry = NULL;
fe8ab488
A
3104 struct cfe_buf *entrybuf;
3105 int updated = 0;
3106
3107 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
3108
3109 socket_lock_assert_owned(so);
3110
3111 if (so->so_cfil == NULL) {
3112 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3113 (uint64_t)VM_KERNEL_ADDRPERM(so));
3114 error = 0;
3115 goto done;
3116 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
3117 CFIL_LOG(LOG_ERR, "so %llx drop set",
3118 (uint64_t)VM_KERNEL_ADDRPERM(so));
3119 error = EPIPE;
3120 goto done;
3121 }
3122
3123 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3124 if (outgoing)
3125 entrybuf = &entry->cfe_snd;
3126 else
3127 entrybuf = &entry->cfe_rcv;
3128
3129 /* Record updated offsets for this content filter */
3130 if (pass_offset > entrybuf->cfe_pass_offset) {
3131 entrybuf->cfe_pass_offset = pass_offset;
3132
3133 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3134 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3135 updated = 1;
3136 } else {
3137 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
3138 pass_offset, entrybuf->cfe_pass_offset);
3139 }
3140 /* Filter does not want or need to see data that's allowed to pass */
3141 if (peek_offset > entrybuf->cfe_pass_offset &&
3142 peek_offset > entrybuf->cfe_peek_offset) {
3143 entrybuf->cfe_peek_offset = peek_offset;
3144 updated = 1;
3145 }
3146 /* Nothing to do */
3147 if (updated == 0)
3148 goto done;
3149
3150 /* Move data held in control queue to pending queue if needed */
3151 error = cfil_data_service_ctl_q(so, kcunit, outgoing);
3152 if (error != 0) {
3153 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3154 error);
3155 goto done;
3156 }
3157 error = EJUSTRETURN;
3158
3159done:
3160 /*
3161 * The filter is effectively detached when pass all from both sides
3162 * or when the socket is closed and no more data is waiting
3163 * to be delivered to the filter
3164 */
3e170ce0 3165 if (entry != NULL &&
fe8ab488
A
3166 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3167 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
3168 ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
3169 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3170 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3171 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3172 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3173 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3174 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
3175 cfil_filters_attached(so) == 0) {
3176 CFIL_LOG(LOG_INFO, "so %llx waking",
3177 (uint64_t)VM_KERNEL_ADDRPERM(so));
3178 wakeup((caddr_t)&so->so_cfil);
3179 }
3180 }
3181 CFIL_INFO_VERIFY(so->so_cfil);
3182 CFIL_LOG(LOG_INFO, "return %d", error);
3183 return (error);
3184}
3185
3186/*
3187 * Update pass offset for socket when no data is pending
3188 */
3189static int
3190cfil_set_socket_pass_offset(struct socket *so, int outgoing)
3191{
3192 struct cfi_buf *cfi_buf;
3193 struct cfil_entry *entry;
3194 struct cfe_buf *entrybuf;
3195 uint32_t kcunit;
3196 uint64_t pass_offset = 0;
3197
3198 if (so->so_cfil == NULL)
3199 return (0);
3200
3201 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
3202 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3203
3204 socket_lock_assert_owned(so);
3205
3206 if (outgoing)
3207 cfi_buf = &so->so_cfil->cfi_snd;
3208 else
3209 cfi_buf = &so->so_cfil->cfi_rcv;
3210
3211 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
3212 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3213 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3214
3215 /* Are we attached to a filter? */
3216 if (entry->cfe_filter == NULL)
3217 continue;
3218
3219 if (outgoing)
3220 entrybuf = &entry->cfe_snd;
3221 else
3222 entrybuf = &entry->cfe_rcv;
3223
3224 if (pass_offset == 0 ||
3225 entrybuf->cfe_pass_offset < pass_offset)
3226 pass_offset = entrybuf->cfe_pass_offset;
3227 }
3228 cfi_buf->cfi_pass_offset = pass_offset;
3229 }
3230
3231 return (0);
3232}
3233
3234int
3235cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing,
3236 uint64_t pass_offset, uint64_t peek_offset)
3237{
3238 errno_t error = 0;
3239
3240 CFIL_LOG(LOG_INFO, "");
3241
3242 socket_lock_assert_owned(so);
3243
3244 error = cfil_acquire_sockbuf(so, outgoing);
3245 if (error != 0) {
3246 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
3247 (uint64_t)VM_KERNEL_ADDRPERM(so),
3248 outgoing ? "out" : "in");
3249 goto release;
3250 }
3251
3252 error = cfil_update_data_offsets(so, kcunit, outgoing,
3253 pass_offset, peek_offset);
3254
3255 cfil_service_inject_queue(so, outgoing);
3256
3257 cfil_set_socket_pass_offset(so, outgoing);
3258release:
3259 CFIL_INFO_VERIFY(so->so_cfil);
3260 cfil_release_sockbuf(so, outgoing);
3261
3262 return (error);
3263}
3264
3265
3266static void
3267cfil_flush_queues(struct socket *so)
3268{
3269 struct cfil_entry *entry;
3270 int kcunit;
3271 uint64_t drained;
3272
3273 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3274 goto done;
3275
3276 socket_lock_assert_owned(so);
3277
3278 /*
3279 * Flush the output queues and ignore errors as long as
3280 * we are attached
3281 */
3282 (void) cfil_acquire_sockbuf(so, 1);
3283 if (so->so_cfil != NULL) {
3284 drained = 0;
3285 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3286 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3287
3288 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
3289 drained += cfil_queue_drain(
3290 &entry->cfe_snd.cfe_pending_q);
3291 }
3292 drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q);
3293 if (drained) {
3294 if (so->so_cfil->cfi_flags & CFIF_DROP)
3295 OSIncrementAtomic(
3296 &cfil_stats.cfs_flush_out_drop);
3297 else
3298 OSIncrementAtomic(
3299 &cfil_stats.cfs_flush_out_close);
3300 }
3301 }
3302 cfil_release_sockbuf(so, 1);
3303
3304 /*
3305 * Flush the input queues
3306 */
3307 (void) cfil_acquire_sockbuf(so, 0);
3308 if (so->so_cfil != NULL) {
3309 drained = 0;
3310 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3311 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3312
3313 drained += cfil_queue_drain(
3314 &entry->cfe_rcv.cfe_ctl_q);
3315 drained += cfil_queue_drain(
3316 &entry->cfe_rcv.cfe_pending_q);
3317 }
3318 drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q);
3319 if (drained) {
3320 if (so->so_cfil->cfi_flags & CFIF_DROP)
3321 OSIncrementAtomic(
3322 &cfil_stats.cfs_flush_in_drop);
3323 else
3324 OSIncrementAtomic(
3325 &cfil_stats.cfs_flush_in_close);
3326 }
3327 }
3328 cfil_release_sockbuf(so, 0);
3329done:
3330 CFIL_INFO_VERIFY(so->so_cfil);
3331}
3332
3333int
3334cfil_action_drop(struct socket *so, uint32_t kcunit)
3335{
3336 errno_t error = 0;
3337 struct cfil_entry *entry;
3338 struct proc *p;
3339
3340 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3341 goto done;
3342
3343 socket_lock_assert_owned(so);
3344
3345 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3346
3347 /* Are we attached to the filter? */
3348 if (entry->cfe_filter == NULL)
3349 goto done;
3350
3351 so->so_cfil->cfi_flags |= CFIF_DROP;
3352
3353 p = current_proc();
3354
3e170ce0
A
3355 /*
3356 * Force the socket to be marked defunct
3357 * (forcing fixed along with rdar://19391339)
3358 */
fe8ab488 3359 error = sosetdefunct(p, so,
39037602
A
3360 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3361 FALSE);
fe8ab488
A
3362
3363 /* Flush the socket buffer and disconnect */
3364 if (error == 0)
39037602
A
3365 error = sodefunct(p, so,
3366 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
fe8ab488
A
3367
3368 /* The filter is done, mark as detached */
3369 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3370 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3371 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3372
3373 /* Pending data needs to go */
3374 cfil_flush_queues(so);
3375
3376 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
3377 if (cfil_filters_attached(so) == 0) {
3378 CFIL_LOG(LOG_INFO, "so %llx waking",
3379 (uint64_t)VM_KERNEL_ADDRPERM(so));
3380 wakeup((caddr_t)&so->so_cfil);
3381 }
3382 }
3383done:
3384 return (error);
3385}
3386
5ba3f43e
A
3387int
3388cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
3389{
3390 errno_t error = 0;
3391
3392 cfil_rw_lock_exclusive(&cfil_lck_rw);
3393
3394 bool cfil_attached = false;
3395 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
3396 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
3397 if (so == NULL) {
3398 error = ENOENT;
3399 } else {
3400 // The client gets a pass automatically
3401 socket_lock(so, 1);
3402 if (cfil_attached) {
3403 (void)cfil_action_data_pass(so, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3404 (void)cfil_action_data_pass(so, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3405 } else {
3406 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
3407 }
3408 socket_unlock(so, 1);
3409 }
3410
3411 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3412
3413 return (error);
3414}
3415
fe8ab488
A
3416static int
3417cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen)
3418{
3419 struct cfil_entry *entry;
3420 struct cfe_buf *entrybuf;
3421 uint32_t kcunit;
3422
3423 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
3424 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
3425
3426 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3427 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3428
3429 /* Are we attached to the filter? */
3430 if (entry->cfe_filter == NULL)
3431 continue;
3432
3433 if (outgoing)
3434 entrybuf = &entry->cfe_snd;
3435 else
3436 entrybuf = &entry->cfe_rcv;
3437
3438 entrybuf->cfe_ctl_q.q_start += datalen;
3439 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
3440 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
3441 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3442 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3443
3444 entrybuf->cfe_ctl_q.q_end += datalen;
3445
3446 entrybuf->cfe_pending_q.q_start += datalen;
3447 entrybuf->cfe_pending_q.q_end += datalen;
3448 }
3449 CFIL_INFO_VERIFY(so->so_cfil);
3450 return (0);
3451}
3452
3453int
3454cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to,
3455 struct mbuf *data, struct mbuf *control, uint32_t flags)
3456{
3457#pragma unused(to, control, flags)
3458 errno_t error = 0;
3459 unsigned int datalen;
3460 int mbcnt;
3461 int kcunit;
3462 struct cfi_buf *cfi_buf;
3463
3464 if (so->so_cfil == NULL) {
3465 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3466 (uint64_t)VM_KERNEL_ADDRPERM(so));
3467 error = 0;
3468 goto done;
3469 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
3470 CFIL_LOG(LOG_ERR, "so %llx drop set",
3471 (uint64_t)VM_KERNEL_ADDRPERM(so));
3472 error = EPIPE;
3473 goto done;
3474 }
3475
3476 datalen = cfil_data_length(data, &mbcnt);
3477
3478 CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3479 (uint64_t)VM_KERNEL_ADDRPERM(so),
3480 outgoing ? "out" : "in",
3481 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
3482 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt));
3483
3484 if (outgoing)
3485 cfi_buf = &so->so_cfil->cfi_snd;
3486 else
3487 cfi_buf = &so->so_cfil->cfi_rcv;
3488
3489 cfi_buf->cfi_pending_last += datalen;
3490 cfi_buf->cfi_pending_mbcnt += mbcnt;
3491 cfil_info_buf_verify(cfi_buf);
3492
3493 CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3494 (uint64_t)VM_KERNEL_ADDRPERM(so),
3495 cfi_buf->cfi_pending_last,
3496 cfi_buf->cfi_pass_offset);
3497
3498 /* Fast path when below pass offset */
3499 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
3500 cfil_update_entry_offsets(so, outgoing, datalen);
3501 } else {
3502 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3503 error = cfil_data_filter(so, kcunit, outgoing, data,
3504 datalen);
3505 /* 0 means passed so continue with next filter */
3506 if (error != 0)
3507 break;
3508 }
3509 }
3510
3511 /* Move cursor if no filter claimed the data */
3512 if (error == 0) {
3513 cfi_buf->cfi_pending_first += datalen;
3514 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3515 cfil_info_buf_verify(cfi_buf);
3516 }
3517done:
3518 CFIL_INFO_VERIFY(so->so_cfil);
3519
3520 return (error);
3521}
3522
3523/*
3524 * Callback from socket layer sosendxxx()
3525 */
3526int
3527cfil_sock_data_out(struct socket *so, struct sockaddr *to,
3528 struct mbuf *data, struct mbuf *control, uint32_t flags)
3529{
3530 int error = 0;
3531
3532 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3533 return (0);
3534
3535 socket_lock_assert_owned(so);
3536
3537 if (so->so_cfil->cfi_flags & CFIF_DROP) {
3538 CFIL_LOG(LOG_ERR, "so %llx drop set",
3539 (uint64_t)VM_KERNEL_ADDRPERM(so));
3540 return (EPIPE);
3541 }
3542 if (control != NULL) {
3543 CFIL_LOG(LOG_ERR, "so %llx control",
3544 (uint64_t)VM_KERNEL_ADDRPERM(so));
3545 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
3546 }
3547 if ((flags & MSG_OOB)) {
3548 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
3549 (uint64_t)VM_KERNEL_ADDRPERM(so));
3550 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
3551 }
3552 if ((so->so_snd.sb_flags & SB_LOCK) == 0)
3553 panic("so %p SB_LOCK not set", so);
3554
3555 if (so->so_snd.sb_cfil_thread != NULL)
3556 panic("%s sb_cfil_thread %p not NULL", __func__,
3557 so->so_snd.sb_cfil_thread);
3558
3559 error = cfil_data_common(so, 1, to, data, control, flags);
3560
3561 return (error);
3562}
3563
3564/*
3565 * Callback from socket layer sbappendxxx()
3566 */
3567int
3568cfil_sock_data_in(struct socket *so, struct sockaddr *from,
3569 struct mbuf *data, struct mbuf *control, uint32_t flags)
3570{
3571 int error = 0;
3572
3573 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3574 return (0);
3575
3576 socket_lock_assert_owned(so);
3577
3578 if (so->so_cfil->cfi_flags & CFIF_DROP) {
3579 CFIL_LOG(LOG_ERR, "so %llx drop set",
3580 (uint64_t)VM_KERNEL_ADDRPERM(so));
3581 return (EPIPE);
3582 }
3583 if (control != NULL) {
3584 CFIL_LOG(LOG_ERR, "so %llx control",
3585 (uint64_t)VM_KERNEL_ADDRPERM(so));
3586 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
3587 }
3588 if (data->m_type == MT_OOBDATA) {
3589 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
3590 (uint64_t)VM_KERNEL_ADDRPERM(so));
3591 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
3592 }
3593 error = cfil_data_common(so, 0, from, data, control, flags);
3594
3595 return (error);
3596}
3597
3598/*
3599 * Callback from socket layer soshutdownxxx()
3600 *
3601 * We may delay the shutdown write if there's outgoing data in process.
3602 *
3603 * There is no point in delaying the shutdown read because the process
3604 * indicated that it does not want to read anymore data.
3605 */
3606int
3607cfil_sock_shutdown(struct socket *so, int *how)
3608{
3609 int error = 0;
3610
3611 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3612 goto done;
3613
3614 socket_lock_assert_owned(so);
3615
3616 CFIL_LOG(LOG_INFO, "so %llx how %d",
3617 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
3618
3619 /*
3620 * Check the state of the socket before the content filter
3621 */
3622 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
3623 /* read already shut down */
3624 error = ENOTCONN;
3625 goto done;
3626 }
3627 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
3628 /* write already shut down */
3629 error = ENOTCONN;
3630 goto done;
3631 }
3632
3633 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
3634 CFIL_LOG(LOG_ERR, "so %llx drop set",
3635 (uint64_t)VM_KERNEL_ADDRPERM(so));
3636 goto done;
3637 }
3638
3639 /*
3640 * shutdown read: SHUT_RD or SHUT_RDWR
3641 */
3642 if (*how != SHUT_WR) {
3643 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
3644 error = ENOTCONN;
3645 goto done;
3646 }
3647 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
3648 cfil_sock_notify_shutdown(so, SHUT_RD);
3649 }
3650 /*
3651 * shutdown write: SHUT_WR or SHUT_RDWR
3652 */
3653 if (*how != SHUT_RD) {
3654 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
3655 error = ENOTCONN;
3656 goto done;
3657 }
3658 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
3659 cfil_sock_notify_shutdown(so, SHUT_WR);
3660 /*
3661 * When outgoing data is pending, we delay the shutdown at the
3662 * protocol level until the content filters give the final
3663 * verdict on the pending data.
3664 */
3665 if (cfil_sock_data_pending(&so->so_snd) != 0) {
3666 /*
3667 * When shutting down the read and write sides at once
3668 * we can proceed to the final shutdown of the read
3669 * side. Otherwise, we just return.
3670 */
3671 if (*how == SHUT_WR) {
3672 error = EJUSTRETURN;
3673 } else if (*how == SHUT_RDWR) {
3674 *how = SHUT_RD;
3675 }
3676 }
3677 }
3678done:
3679 return (error);
3680}
3681
3682/*
3683 * This is called when the socket is closed and there is no more
3684 * opportunity for filtering
3685 */
3686void
3687cfil_sock_is_closed(struct socket *so)
3688{
3689 errno_t error = 0;
3690 int kcunit;
3691
3692 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3693 return;
3694
3695 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
3696
3697 socket_lock_assert_owned(so);
3698
3699 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3700 /* Let the filters know of the closing */
3701 error = cfil_dispatch_closed_event(so, kcunit);
3702 }
3703
3704 /* Last chance to push passed data out */
3705 error = cfil_acquire_sockbuf(so, 1);
3706 if (error == 0)
3707 cfil_service_inject_queue(so, 1);
3708 cfil_release_sockbuf(so, 1);
3709
3710 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
3711
3712 /* Pending data needs to go */
3713 cfil_flush_queues(so);
3714
3715 CFIL_INFO_VERIFY(so->so_cfil);
3716}
3717
3718/*
3719 * This is called when the socket is disconnected so let the filters
3720 * know about the disconnection and that no more data will come
3721 *
3722 * The how parameter has the same values as soshutown()
3723 */
3724void
3725cfil_sock_notify_shutdown(struct socket *so, int how)
3726{
3727 errno_t error = 0;
3728 int kcunit;
3729
3730 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3731 return;
3732
3733 CFIL_LOG(LOG_INFO, "so %llx how %d",
3734 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
3735
3736 socket_lock_assert_owned(so);
3737
3738 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3739 /* Disconnect incoming side */
3740 if (how != SHUT_WR)
3741 error = cfil_dispatch_disconnect_event(so, kcunit, 0);
3742 /* Disconnect outgoing side */
3743 if (how != SHUT_RD)
3744 error = cfil_dispatch_disconnect_event(so, kcunit, 1);
3745 }
3746}
3747
3748static int
3749cfil_filters_attached(struct socket *so)
3750{
3751 struct cfil_entry *entry;
3752 uint32_t kcunit;
3753 int attached = 0;
3754
3755 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3756 return (0);
3757
3758 socket_lock_assert_owned(so);
3759
3760 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3761 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3762
3763 /* Are we attached to the filter? */
3764 if (entry->cfe_filter == NULL)
3765 continue;
3766 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
3767 continue;
3768 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
3769 continue;
3770 attached = 1;
3771 break;
3772 }
3773
3774 return (attached);
3775}
3776
3777/*
3778 * This is called when the socket is closed and we are waiting for
3779 * the filters to gives the final pass or drop
3780 */
3781void
3782cfil_sock_close_wait(struct socket *so)
3783{
3784 lck_mtx_t *mutex_held;
3785 struct timespec ts;
3786 int error;
3787
3788 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3789 return;
3790
3791 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
3792
3793 if (so->so_proto->pr_getlock != NULL)
5ba3f43e 3794 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
fe8ab488
A
3795 else
3796 mutex_held = so->so_proto->pr_domain->dom_mtx;
5ba3f43e 3797 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
fe8ab488
A
3798
3799 while (cfil_filters_attached(so)) {
3800 /*
3801 * Notify the filters we are going away so they can detach
3802 */
3803 cfil_sock_notify_shutdown(so, SHUT_RDWR);
3804
3805 /*
3806 * Make sure we need to wait after the filter are notified
3807 * of the disconnection
3808 */
3809 if (cfil_filters_attached(so) == 0)
3810 break;
3811
3812 CFIL_LOG(LOG_INFO, "so %llx waiting",
3813 (uint64_t)VM_KERNEL_ADDRPERM(so));
3814
3815 ts.tv_sec = cfil_close_wait_timeout / 1000;
3816 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
3817 NSEC_PER_USEC * 1000;
3818
3819 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
3820 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
3821 error = msleep((caddr_t)&so->so_cfil, mutex_held,
3822 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
3823 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
3824
3825 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
3826 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
3827
3828 /*
3829 * Force close in case of timeout
3830 */
3831 if (error != 0) {
3832 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
3833 break;
3834 }
3835 }
3836
3837}
3838
3839/*
3840 * Returns the size of the data held by the content filter by using
3841 */
3842int32_t
3843cfil_sock_data_pending(struct sockbuf *sb)
3844{
3845 struct socket *so = sb->sb_so;
3846 uint64_t pending = 0;
3847
3848 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
3849 struct cfi_buf *cfi_buf;
3850
3851 socket_lock_assert_owned(so);
3852
3853 if ((sb->sb_flags & SB_RECV) == 0)
3854 cfi_buf = &so->so_cfil->cfi_snd;
3855 else
3856 cfi_buf = &so->so_cfil->cfi_rcv;
3857
3858 pending = cfi_buf->cfi_pending_last -
3859 cfi_buf->cfi_pending_first;
3860
3861 /*
3862 * If we are limited by the "chars of mbufs used" roughly
3863 * adjust so we won't overcommit
3864 */
3865 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
3866 pending = cfi_buf->cfi_pending_mbcnt;
3867 }
3868
3869 VERIFY(pending < INT32_MAX);
3870
3871 return (int32_t)(pending);
3872}
3873
3874/*
3875 * Return the socket buffer space used by data being held by content filters
3876 * so processes won't clog the socket buffer
3877 */
3878int32_t
3879cfil_sock_data_space(struct sockbuf *sb)
3880{
3881 struct socket *so = sb->sb_so;
3882 uint64_t pending = 0;
3883
3884 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
3885 so->so_snd.sb_cfil_thread != current_thread()) {
3886 struct cfi_buf *cfi_buf;
3887
3888 socket_lock_assert_owned(so);
3889
3890 if ((sb->sb_flags & SB_RECV) == 0)
3891 cfi_buf = &so->so_cfil->cfi_snd;
3892 else
3893 cfi_buf = &so->so_cfil->cfi_rcv;
3894
3895 pending = cfi_buf->cfi_pending_last -
3896 cfi_buf->cfi_pending_first;
3897
3898 /*
3899 * If we are limited by the "chars of mbufs used" roughly
3900 * adjust so we won't overcommit
3901 */
3902 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
3903 pending = cfi_buf->cfi_pending_mbcnt;
3904 }
3905
3906 VERIFY(pending < INT32_MAX);
3907
3908 return (int32_t)(pending);
3909}
3910
3911/*
3912 * A callback from the socket and protocol layer when data becomes
3913 * available in the socket buffer to give a chance for the content filter
3914 * to re-inject data that was held back
3915 */
3916void
3917cfil_sock_buf_update(struct sockbuf *sb)
3918{
3919 int outgoing;
3920 int error;
3921 struct socket *so = sb->sb_so;
3922
3923 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3924 return;
3925
3926 if (!cfil_sbtrim)
3927 return;
3928
3929 socket_lock_assert_owned(so);
3930
3931 if ((sb->sb_flags & SB_RECV) == 0) {
3932 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
3933 return;
3934 outgoing = 1;
3935 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
3936 } else {
3937 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
3938 return;
3939 outgoing = 0;
3940 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
3941 }
3942
3943 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
3944 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3945
3946 error = cfil_acquire_sockbuf(so, outgoing);
3947 if (error == 0)
3948 cfil_service_inject_queue(so, outgoing);
3949 cfil_release_sockbuf(so, outgoing);
3950}
3951
3952int
3953sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
3954 struct sysctl_req *req)
3955{
3956#pragma unused(oidp, arg1, arg2)
3957 int error = 0;
3958 size_t len = 0;
3959 u_int32_t i;
3960
3961 /* Read only */
3962 if (req->newptr != USER_ADDR_NULL)
3963 return (EPERM);
3964
3965 cfil_rw_lock_shared(&cfil_lck_rw);
3966
3967 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
3968 struct cfil_filter_stat filter_stat;
3969 struct content_filter *cfc = content_filters[i];
3970
3971 if (cfc == NULL)
3972 continue;
3973
3974 /* If just asking for the size */
3975 if (req->oldptr == USER_ADDR_NULL) {
3976 len += sizeof(struct cfil_filter_stat);
3977 continue;
3978 }
3979
3980 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
3981 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
3982 filter_stat.cfs_filter_id = cfc->cf_kcunit;
3983 filter_stat.cfs_flags = cfc->cf_flags;
3984 filter_stat.cfs_sock_count = cfc->cf_sock_count;
3985 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
3986
3987 error = SYSCTL_OUT(req, &filter_stat,
3988 sizeof (struct cfil_filter_stat));
3989 if (error != 0)
3990 break;
3991 }
3992 /* If just asking for the size */
3993 if (req->oldptr == USER_ADDR_NULL)
3994 req->oldidx = len;
3995
3996 cfil_rw_unlock_shared(&cfil_lck_rw);
3997
3998 return (error);
3999}
4000
4001static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4002 struct sysctl_req *req)
4003{
4004#pragma unused(oidp, arg1, arg2)
4005 int error = 0;
4006 u_int32_t i;
4007 struct cfil_info *cfi;
4008
4009 /* Read only */
4010 if (req->newptr != USER_ADDR_NULL)
4011 return (EPERM);
4012
4013 cfil_rw_lock_shared(&cfil_lck_rw);
4014
4015 /*
4016 * If just asking for the size,
4017 */
4018 if (req->oldptr == USER_ADDR_NULL) {
4019 req->oldidx = cfil_sock_attached_count *
4020 sizeof(struct cfil_sock_stat);
4021 /* Bump the length in case new sockets gets attached */
4022 req->oldidx += req->oldidx >> 3;
4023 goto done;
4024 }
4025
4026 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
4027 struct cfil_entry *entry;
4028 struct cfil_sock_stat stat;
4029 struct socket *so = cfi->cfi_so;
4030
4031 bzero(&stat, sizeof(struct cfil_sock_stat));
4032 stat.cfs_len = sizeof(struct cfil_sock_stat);
4033 stat.cfs_sock_id = cfi->cfi_sock_id;
4034 stat.cfs_flags = cfi->cfi_flags;
4035
4036 if (so != NULL) {
4037 stat.cfs_pid = so->last_pid;
4038 memcpy(stat.cfs_uuid, so->last_uuid,
4039 sizeof(uuid_t));
4040 if (so->so_flags & SOF_DELEGATED) {
4041 stat.cfs_e_pid = so->e_pid;
4042 memcpy(stat.cfs_e_uuid, so->e_uuid,
4043 sizeof(uuid_t));
4044 } else {
4045 stat.cfs_e_pid = so->last_pid;
4046 memcpy(stat.cfs_e_uuid, so->last_uuid,
4047 sizeof(uuid_t));
4048 }
4049 }
4050
4051 stat.cfs_snd.cbs_pending_first =
4052 cfi->cfi_snd.cfi_pending_first;
4053 stat.cfs_snd.cbs_pending_last =
4054 cfi->cfi_snd.cfi_pending_last;
4055 stat.cfs_snd.cbs_inject_q_len =
4056 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
4057 stat.cfs_snd.cbs_pass_offset =
4058 cfi->cfi_snd.cfi_pass_offset;
4059
4060 stat.cfs_rcv.cbs_pending_first =
4061 cfi->cfi_rcv.cfi_pending_first;
4062 stat.cfs_rcv.cbs_pending_last =
4063 cfi->cfi_rcv.cfi_pending_last;
4064 stat.cfs_rcv.cbs_inject_q_len =
4065 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
4066 stat.cfs_rcv.cbs_pass_offset =
4067 cfi->cfi_rcv.cfi_pass_offset;
4068
4069 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
4070 struct cfil_entry_stat *estat;
4071 struct cfe_buf *ebuf;
4072 struct cfe_buf_stat *sbuf;
4073
4074 entry = &cfi->cfi_entries[i];
4075
4076 estat = &stat.ces_entries[i];
4077
4078 estat->ces_len = sizeof(struct cfil_entry_stat);
4079 estat->ces_filter_id = entry->cfe_filter ?
4080 entry->cfe_filter->cf_kcunit : 0;
4081 estat->ces_flags = entry->cfe_flags;
4082 estat->ces_necp_control_unit =
4083 entry->cfe_necp_control_unit;
4084
4085 estat->ces_last_event.tv_sec =
4086 (int64_t)entry->cfe_last_event.tv_sec;
4087 estat->ces_last_event.tv_usec =
4088 (int64_t)entry->cfe_last_event.tv_usec;
4089
4090 estat->ces_last_action.tv_sec =
4091 (int64_t)entry->cfe_last_action.tv_sec;
4092 estat->ces_last_action.tv_usec =
4093 (int64_t)entry->cfe_last_action.tv_usec;
4094
4095 ebuf = &entry->cfe_snd;
4096 sbuf = &estat->ces_snd;
4097 sbuf->cbs_pending_first =
4098 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4099 sbuf->cbs_pending_last =
4100 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4101 sbuf->cbs_ctl_first =
4102 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4103 sbuf->cbs_ctl_last =
4104 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4105 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4106 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4107 sbuf->cbs_peeked = ebuf->cfe_peeked;
4108
4109 ebuf = &entry->cfe_rcv;
4110 sbuf = &estat->ces_rcv;
4111 sbuf->cbs_pending_first =
4112 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4113 sbuf->cbs_pending_last =
4114 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4115 sbuf->cbs_ctl_first =
4116 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4117 sbuf->cbs_ctl_last =
4118 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4119 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4120 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4121 sbuf->cbs_peeked = ebuf->cfe_peeked;
4122 }
4123 error = SYSCTL_OUT(req, &stat,
4124 sizeof (struct cfil_sock_stat));
4125 if (error != 0)
4126 break;
4127 }
4128done:
4129 cfil_rw_unlock_shared(&cfil_lck_rw);
4130
4131 return (error);
4132}