]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 * *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database specify which TCP/IP sockets needs to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
80 * Note: NECP currently supports a ingle filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is send by the user space filter agent.
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years are 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
126 * They are two kinds of content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
130 *
131 * ACTION MESSAGES
132 *
133 * The CFM_OP_DATA_UPDATE action messages let the user space filter
134 * agent allow data to flow up to the specified pass offset -- there
135 * is a pass offset for outgoing data and a pass offset for incoming data.
136 * When a new TCP/IP socket is attached to the content filter, each pass offset
137 * is initially set to 0 so not data is allowed to pass by default.
138 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
139 * then the data flow becomes unrestricted.
140 *
141 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
142 * with a pass offset smaller than the pass offset of a previous
143 * CFM_OP_DATA_UPDATE message is silently ignored.
144 *
145 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
146 * to tell the kernel how much data it wants to see by using the peek offsets.
147 * Just like pass offsets, there is a peek offset for each direction.
148 * When a new TCP/IP socket is attached to the content filter, each peek offset
149 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
150 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
151 * with a greater than 0 peek offset is sent by the user space filter agent.
152 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
153 * then the flow of update data events becomes unrestricted.
154 *
155 * Note that peek offsets cannot be smaller than the corresponding pass offset.
156 * Also a peek offsets cannot be smaller than the corresponding end offset
157 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
158 * to set a too small peek value is silently ignored.
159 *
160 *
161 * PER SOCKET "struct cfil_info"
162 *
163 * As soon as a TCP/IP socket gets attached to a content filter, a
164 * "struct cfil_info" is created to hold the content filtering state for this
165 * socket.
166 *
167 * The content filtering state is made of the following information
168 * for each direction:
169 * - The current pass offset;
170 * - The first and last offsets of the data pending, waiting for a filtering
171 * decision;
172 * - The inject queue for data that passed the filters and that needs
173 * to be re-injected;
174 * - A content filter specific state in a set of "struct cfil_entry"
175 *
176 *
177 * CONTENT FILTER STATE "struct cfil_entry"
178 *
179 * The "struct cfil_entry" maintains the information most relevant to the
180 * message handling over a kernel control socket with a user space filter agent.
181 *
182 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
183 * to the kernel control socket unit it corresponds to and also has a pointer
184 * to the corresponding "struct content_filter".
185 *
186 * For each direction, "struct cfil_entry" maintains the following information:
187 * - The pass offset
188 * - The peek offset
189 * - The offset of the last data peeked at by the filter
190 * - A queue of data that's waiting to be delivered to the user space filter
191 * agent on the kernel control socket
192 * - A queue of data for which event messages have been sent on the kernel
193 * control socket and are pending for a filtering decision.
194 *
195 *
196 * CONTENT FILTER QUEUES
197 *
198 * Data that is being filtered is steered away from the TCP/IP socket buffer
199 * and instead will sit in one of three content filter queue until the data
200 * can be re-injected into the TCP/IP socket buffer.
201 *
202 * A content filter queue is represented by "struct cfil_queue" that contains
203 * a list of mbufs and the start and end offset of the data span of
204 * the list of mbufs.
205 *
206 * The data moves into the three content filter queues according to this
207 * sequence:
208 * a) The "cfe_ctl_q" of "struct cfil_entry"
209 * b) The "cfe_pending_q" of "struct cfil_entry"
210 * c) The "cfi_inject_q" of "struct cfil_info"
211 *
212 * Note: The seqyence (a),(b) may be repeated several times if there are more
213 * than one content filter attached to the TCP/IP socket.
214 *
215 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
216 * kernel conntrol socket for two reasons:
217 * - The peek offset is less that the end offset of the mbuf data
218 * - The kernel control socket is flow controlled
219 *
220 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
221 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
222 * socket and are waiting for a pass action message fromn the user space
223 * filter agent. An mbuf length must be fully allowed to pass to be removed
224 * from the cfe_pending_q.
225 *
226 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
227 * by the user space filter agent and that needs to be re-injected into the
228 * TCP/IP socket.
229 *
230 *
231 * IMPACT ON FLOW CONTROL
232 *
233 * An essential aspect of the content filer subsystem is to minimize the
234 * impact on flow control of the TCP/IP sockets being filtered.
235 *
236 * The processing overhead of the content filtering may have an effect on
237 * flow control by adding noticeable delays and cannot be eliminated --
238 * care must be taken by the user space filter agent to minimize the
239 * processing delays.
240 *
241 * The amount of data being filtered is kept in buffers while waiting for
242 * a decision by the user space filter agent. This amount of data pending
243 * needs to be subtracted from the amount of data available in the
244 * corresponding TCP/IP socket buffer. This is done by modifying
245 * sbspace() and tcp_sbspace() to account for amount of data pending
246 * in the content filter.
247 *
248 *
249 * LOCKING STRATEGY
250 *
251 * The global state of content filter subsystem is protected by a single
252 * read-write lock "cfil_lck_rw". The data flow can be done with the
253 * cfil read-write lock held as shared so it can be re-entered from multiple
254 * threads.
255 *
256 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
257 * protected by the socket lock.
258 *
259 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
260 * is held. That's why we have some sequences where we drop the cfil read-write
261 * lock before taking the TCP/IP lock.
262 *
263 * It is also important to lock the TCP/IP socket buffer while the content
264 * filter is modifying the amount of pending data. Otherwise the calculations
265 * in sbspace() and tcp_sbspace() could be wrong.
266 *
267 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
268 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
269 *
270 * Actually "cfe_link" and "cfe_filter" are protected by both by
271 * "cfil_lck_rw" and the socket lock: they may be modified only when
272 * "cfil_lck_rw" is exclusive and the socket is locked.
273 *
274 * To read the other fields of "struct content_filter" we have to take
275 * "cfil_lck_rw" in shared mode.
276 *
277 *
278 * LIMITATIONS
279 *
280 * - For TCP sockets only
281 *
282 * - Does not support TCP unordered messages
283 */
284
285 /*
286 * TO DO LIST
287 *
288 * SOONER:
289 *
290 * Deal with OOB
291 *
292 * LATER:
293 *
294 * If support datagram, enqueue control and address mbufs as well
295 */
296
297 #include <sys/types.h>
298 #include <sys/kern_control.h>
299 #include <sys/queue.h>
300 #include <sys/domain.h>
301 #include <sys/protosw.h>
302 #include <sys/syslog.h>
303
304 #include <kern/locks.h>
305 #include <kern/zalloc.h>
306 #include <kern/debug.h>
307
308 #include <net/content_filter.h>
309
310 #include <netinet/in_pcb.h>
311 #include <netinet/tcp.h>
312 #include <netinet/tcp_var.h>
313
314 #include <string.h>
315 #include <libkern/libkern.h>
316
317
318 #define MAX_CONTENT_FILTER 2
319
320 struct cfil_entry;
321
322 /*
323 * The structure content_filter represents a user space content filter
324 * It's created and associated with a kernel control socket instance
325 */
326 struct content_filter {
327 kern_ctl_ref cf_kcref;
328 u_int32_t cf_kcunit;
329 u_int32_t cf_flags;
330
331 uint32_t cf_necp_control_unit;
332
333 uint32_t cf_sock_count;
334 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
335 };
336
337 #define CFF_ACTIVE 0x01
338 #define CFF_DETACHING 0x02
339 #define CFF_FLOW_CONTROLLED 0x04
340
341 struct content_filter **content_filters = NULL;
342 uint32_t cfil_active_count = 0; /* Number of active content filters */
343 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
344 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
345
346 static kern_ctl_ref cfil_kctlref = NULL;
347
348 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
349 static lck_attr_t *cfil_lck_attr = NULL;
350 static lck_grp_t *cfil_lck_grp = NULL;
351 decl_lck_rw_data(static, cfil_lck_rw);
352
353 #define CFIL_RW_LCK_MAX 8
354
355 int cfil_rw_nxt_lck = 0;
356 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
357
358 int cfil_rw_nxt_unlck = 0;
359 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
360
361 #define CONTENT_FILTER_ZONE_NAME "content_filter"
362 #define CONTENT_FILTER_ZONE_MAX 10
363 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
364
365
366 #define CFIL_INFO_ZONE_NAME "cfil_info"
367 #define CFIL_INFO_ZONE_MAX 1024
368 static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
369
370 MBUFQ_HEAD(cfil_mqhead);
371
372 struct cfil_queue {
373 uint64_t q_start; /* offset of first byte in queue */
374 uint64_t q_end; /* offset of last byte in queue */
375 struct cfil_mqhead q_mq;
376 };
377
378 /*
379 * struct cfil_entry
380 *
381 * The is one entry per content filter
382 */
383 struct cfil_entry {
384 TAILQ_ENTRY(cfil_entry) cfe_link;
385 struct content_filter *cfe_filter;
386
387 struct cfil_info *cfe_cfil_info;
388 uint32_t cfe_flags;
389 uint32_t cfe_necp_control_unit;
390 struct timeval cfe_last_event; /* To user space */
391 struct timeval cfe_last_action; /* From user space */
392
393 struct cfe_buf {
394 /*
395 * cfe_pending_q holds data that has been delivered to
396 * the filter and for which we are waiting for an action
397 */
398 struct cfil_queue cfe_pending_q;
399 /*
400 * This queue is for data that has not be delivered to
401 * the content filter (new data, pass peek or flow control)
402 */
403 struct cfil_queue cfe_ctl_q;
404
405 uint64_t cfe_pass_offset;
406 uint64_t cfe_peek_offset;
407 uint64_t cfe_peeked;
408 } cfe_snd, cfe_rcv;
409 };
410
411 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
412 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
413 #define CFEF_DATA_START 0x0004 /* can send data event */
414 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
415 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
416 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
417 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
418 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
419
420 /*
421 * struct cfil_info
422 *
423 * There is a struct cfil_info per socket
424 */
425 struct cfil_info {
426 TAILQ_ENTRY(cfil_info) cfi_link;
427 struct socket *cfi_so;
428 uint64_t cfi_flags;
429 uint64_t cfi_sock_id;
430
431 struct cfi_buf {
432 /*
433 * cfi_pending_first and cfi_pending_last describe the total
434 * amount of data outstanding for all the filters on
435 * this socket and data in the flow queue
436 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
437 */
438 uint64_t cfi_pending_first;
439 uint64_t cfi_pending_last;
440 int cfi_pending_mbcnt;
441 /*
442 * cfi_pass_offset is the minimum of all the filters
443 */
444 uint64_t cfi_pass_offset;
445 /*
446 * cfi_inject_q holds data that needs to be re-injected
447 * into the socket after filtering and that can
448 * be queued because of flow control
449 */
450 struct cfil_queue cfi_inject_q;
451 } cfi_snd, cfi_rcv;
452
453 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
454 };
455
456 #define CFIF_DROP 0x0001 /* drop action applied */
457 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
458 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
459 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
460 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
461 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
462 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
463
464 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
465 #define CFI_SHIFT_GENCNT 32
466 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
467 #define CFI_SHIFT_FLOWHASH 0
468
469 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
470
471 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
472 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
473
474 /*
475 * Statistics
476 */
477
478 struct cfil_stats cfil_stats;
479
480 /*
481 * For troubleshooting
482 */
483 int cfil_log_level = LOG_ERR;
484 int cfil_debug = 1;
485
486 /*
487 * Sysctls for logs and statistics
488 */
489 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
490 struct sysctl_req *);
491 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
492 struct sysctl_req *);
493
494 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil");
495
496 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
497 &cfil_log_level, 0, "");
498
499 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED,
500 &cfil_debug, 0, "");
501
502 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED,
503 &cfil_sock_attached_count, 0, "");
504
505 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED,
506 &cfil_active_count, 0, "");
507
508 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
509 &cfil_close_wait_timeout, 0, "");
510
511 static int cfil_sbtrim = 1;
512 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED,
513 &cfil_sbtrim, 0, "");
514
515 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED,
516 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
517
518 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED,
519 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
520
521 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED,
522 &cfil_stats, cfil_stats, "");
523
524 /*
525 * Forward declaration to appease the compiler
526 */
527 static int cfil_action_data_pass(struct socket *, uint32_t, int,
528 uint64_t, uint64_t);
529 static int cfil_action_drop(struct socket *, uint32_t);
530 static int cfil_dispatch_closed_event(struct socket *, int);
531 static int cfil_data_common(struct socket *, int, struct sockaddr *,
532 struct mbuf *, struct mbuf *, uint32_t);
533 static int cfil_data_filter(struct socket *, uint32_t, int,
534 struct mbuf *, uint64_t);
535 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
536 struct in_addr, u_int16_t);
537 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
538 struct in6_addr *, u_int16_t);
539 static int cfil_dispatch_attach_event(struct socket *, uint32_t);
540 static void cfil_info_free(struct socket *, struct cfil_info *);
541 static struct cfil_info * cfil_info_alloc(struct socket *);
542 static int cfil_info_attach_unit(struct socket *, uint32_t);
543 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t);
544 static int cfil_service_pending_queue(struct socket *, uint32_t, int);
545 static int cfil_data_service_ctl_q(struct socket *, uint32_t, int);
546 static void cfil_info_verify(struct cfil_info *);
547 static int cfil_update_data_offsets(struct socket *, uint32_t, int,
548 uint64_t, uint64_t);
549 static int cfil_acquire_sockbuf(struct socket *, int);
550 static void cfil_release_sockbuf(struct socket *, int);
551 static int cfil_filters_attached(struct socket *);
552
553 static void cfil_rw_lock_exclusive(lck_rw_t *);
554 static void cfil_rw_unlock_exclusive(lck_rw_t *);
555 static void cfil_rw_lock_shared(lck_rw_t *);
556 static void cfil_rw_unlock_shared(lck_rw_t *);
557 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
558 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
559
560 static unsigned int cfil_data_length(struct mbuf *, int *);
561
562 /*
563 * Content filter global read write lock
564 */
565
566 static void
567 cfil_rw_lock_exclusive(lck_rw_t *lck)
568 {
569 void *lr_saved;
570
571 lr_saved = __builtin_return_address(0);
572
573 lck_rw_lock_exclusive(lck);
574
575 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
576 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
577 }
578
579 static void
580 cfil_rw_unlock_exclusive(lck_rw_t *lck)
581 {
582 void *lr_saved;
583
584 lr_saved = __builtin_return_address(0);
585
586 lck_rw_unlock_exclusive(lck);
587
588 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
589 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
590 }
591
592 static void
593 cfil_rw_lock_shared(lck_rw_t *lck)
594 {
595 void *lr_saved;
596
597 lr_saved = __builtin_return_address(0);
598
599 lck_rw_lock_shared(lck);
600
601 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
602 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
603 }
604
605 static void
606 cfil_rw_unlock_shared(lck_rw_t *lck)
607 {
608 void *lr_saved;
609
610 lr_saved = __builtin_return_address(0);
611
612 lck_rw_unlock_shared(lck);
613
614 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
615 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
616 }
617
618 static boolean_t
619 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
620 {
621 void *lr_saved;
622 boolean_t upgraded;
623
624 lr_saved = __builtin_return_address(0);
625
626 upgraded = lck_rw_lock_shared_to_exclusive(lck);
627 if (upgraded) {
628 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
629 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
630 }
631 return (upgraded);
632 }
633
634 static void
635 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
636 {
637 void *lr_saved;
638
639 lr_saved = __builtin_return_address(0);
640
641 lck_rw_lock_exclusive_to_shared(lck);
642
643 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
644 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
645 }
646
647 static void
648 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
649 {
650 lck_rw_assert(lck,
651 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
652 }
653
654 static void
655 socket_lock_assert_owned(struct socket *so)
656 {
657 lck_mtx_t *mutex_held;
658
659 if (so->so_proto->pr_getlock != NULL)
660 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
661 else
662 mutex_held = so->so_proto->pr_domain->dom_mtx;
663
664 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
665 }
666
667 /*
668 * Return the number of bytes in the mbuf chain using the same
669 * method as m_length() or sballoc()
670 */
671 static unsigned int
672 cfil_data_length(struct mbuf *m, int *retmbcnt)
673 {
674 struct mbuf *m0;
675 unsigned int pktlen;
676 int mbcnt;
677
678 if (retmbcnt == NULL)
679 return (m_length(m));
680
681 pktlen = 0;
682 mbcnt = 0;
683 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
684 pktlen += m0->m_len;
685 mbcnt += MSIZE;
686 if (m0->m_flags & M_EXT)
687 mbcnt += m0->m_ext.ext_size;
688 }
689 *retmbcnt = mbcnt;
690 return (pktlen);
691 }
692
693 /*
694 * Common mbuf queue utilities
695 */
696
697 static inline void
698 cfil_queue_init(struct cfil_queue *cfq)
699 {
700 cfq->q_start = 0;
701 cfq->q_end = 0;
702 MBUFQ_INIT(&cfq->q_mq);
703 }
704
705 static inline uint64_t
706 cfil_queue_drain(struct cfil_queue *cfq)
707 {
708 uint64_t drained = cfq->q_start - cfq->q_end;
709 cfq->q_start = 0;
710 cfq->q_end = 0;
711 MBUFQ_DRAIN(&cfq->q_mq);
712
713 return (drained);
714 }
715
716 /* Return 1 when empty, 0 otherwise */
717 static inline int
718 cfil_queue_empty(struct cfil_queue *cfq)
719 {
720 return (MBUFQ_EMPTY(&cfq->q_mq));
721 }
722
723 static inline uint64_t
724 cfil_queue_offset_first(struct cfil_queue *cfq)
725 {
726 return (cfq->q_start);
727 }
728
729 static inline uint64_t
730 cfil_queue_offset_last(struct cfil_queue *cfq)
731 {
732 return (cfq->q_end);
733 }
734
735 static inline uint64_t
736 cfil_queue_len(struct cfil_queue *cfq)
737 {
738 return (cfq->q_end - cfq->q_start);
739 }
740
741 /*
742 * Routines to verify some fundamental assumptions
743 */
744
745 static void
746 cfil_queue_verify(struct cfil_queue *cfq)
747 {
748 mbuf_t m;
749 mbuf_t n;
750 uint64_t queuesize = 0;
751
752 /* Verify offset are ordered */
753 VERIFY(cfq->q_start <= cfq->q_end);
754
755 /*
756 * When queue is empty, the offsets are equal otherwise the offsets
757 * are different
758 */
759 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
760 (!MBUFQ_EMPTY(&cfq->q_mq) &&
761 cfq->q_start != cfq->q_end));
762
763 MBUFQ_FOREACH(m, &cfq->q_mq) {
764 size_t chainsize = 0;
765 unsigned int mlen = m_length(m);
766
767 if (m == (void *)M_TAG_FREE_PATTERN ||
768 m->m_next == (void *)M_TAG_FREE_PATTERN ||
769 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
770 panic("%s - mq %p is free at %p", __func__,
771 &cfq->q_mq, m);
772 for (n = m; n != NULL; n = n->m_next) {
773 if (n->m_type != MT_DATA &&
774 n->m_type != MT_HEADER &&
775 n->m_type != MT_OOBDATA)
776 panic("%s - %p unsupported type %u", __func__,
777 n, n->m_type);
778 chainsize += n->m_len;
779 }
780 if (mlen != chainsize)
781 panic("%s - %p m_length() %u != chainsize %lu",
782 __func__, m, mlen, chainsize);
783 queuesize += chainsize;
784 }
785 if (queuesize != cfq->q_end - cfq->q_start)
786 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
787 m, queuesize, cfq->q_end - cfq->q_start);
788 }
789
790 static void
791 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
792 {
793 CFIL_QUEUE_VERIFY(cfq);
794
795 MBUFQ_ENQUEUE(&cfq->q_mq, m);
796 cfq->q_end += len;
797
798 CFIL_QUEUE_VERIFY(cfq);
799 }
800
801 static void
802 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
803 {
804 CFIL_QUEUE_VERIFY(cfq);
805
806 VERIFY(m_length(m) == len);
807
808 MBUFQ_REMOVE(&cfq->q_mq, m);
809 MBUFQ_NEXT(m) = NULL;
810 cfq->q_start += len;
811
812 CFIL_QUEUE_VERIFY(cfq);
813 }
814
815 static mbuf_t
816 cfil_queue_first(struct cfil_queue *cfq)
817 {
818 return (MBUFQ_FIRST(&cfq->q_mq));
819 }
820
821 static mbuf_t
822 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
823 {
824 #pragma unused(cfq)
825 return (MBUFQ_NEXT(m));
826 }
827
828 static void
829 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
830 {
831 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
832 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
833
834 /* Verify the queues are ordered so that pending is before ctl */
835 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
836
837 /* The peek offset cannot be less than the pass offset */
838 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
839
840 /* Make sure we've updated the offset we peeked at */
841 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
842 }
843
844 static void
845 cfil_entry_verify(struct cfil_entry *entry)
846 {
847 cfil_entry_buf_verify(&entry->cfe_snd);
848 cfil_entry_buf_verify(&entry->cfe_rcv);
849 }
850
851 static void
852 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
853 {
854 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
855
856 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
857 VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
858 }
859
860 static void
861 cfil_info_verify(struct cfil_info *cfil_info)
862 {
863 int i;
864
865 if (cfil_info == NULL)
866 return;
867
868 cfil_info_buf_verify(&cfil_info->cfi_snd);
869 cfil_info_buf_verify(&cfil_info->cfi_rcv);
870
871 for (i = 0; i < MAX_CONTENT_FILTER; i++)
872 cfil_entry_verify(&cfil_info->cfi_entries[i]);
873 }
874
875 static void
876 verify_content_filter(struct content_filter *cfc)
877 {
878 struct cfil_entry *entry;
879 uint32_t count = 0;
880
881 VERIFY(cfc->cf_sock_count >= 0);
882
883 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
884 count++;
885 VERIFY(cfc == entry->cfe_filter);
886 }
887 VERIFY(count == cfc->cf_sock_count);
888 }
889
890 /*
891 * Kernel control socket callbacks
892 */
893 static errno_t
894 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
895 void **unitinfo)
896 {
897 errno_t error = 0;
898 struct content_filter *cfc = NULL;
899
900 CFIL_LOG(LOG_NOTICE, "");
901
902 cfc = zalloc(content_filter_zone);
903 if (cfc == NULL) {
904 CFIL_LOG(LOG_ERR, "zalloc failed");
905 error = ENOMEM;
906 goto done;
907 }
908 bzero(cfc, sizeof(struct content_filter));
909
910 cfil_rw_lock_exclusive(&cfil_lck_rw);
911 if (content_filters == NULL) {
912 struct content_filter **tmp;
913
914 cfil_rw_unlock_exclusive(&cfil_lck_rw);
915
916 MALLOC(tmp,
917 struct content_filter **,
918 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
919 M_TEMP,
920 M_WAITOK | M_ZERO);
921
922 cfil_rw_lock_exclusive(&cfil_lck_rw);
923
924 if (tmp == NULL && content_filters == NULL) {
925 error = ENOMEM;
926 cfil_rw_unlock_exclusive(&cfil_lck_rw);
927 goto done;
928 }
929 /* Another thread may have won the race */
930 if (content_filters != NULL)
931 FREE(tmp, M_TEMP);
932 else
933 content_filters = tmp;
934 }
935
936 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
937 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
938 error = EINVAL;
939 } else if (content_filters[sac->sc_unit - 1] != NULL) {
940 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
941 error = EADDRINUSE;
942 } else {
943 /*
944 * kernel control socket kcunit numbers start at 1
945 */
946 content_filters[sac->sc_unit - 1] = cfc;
947
948 cfc->cf_kcref = kctlref;
949 cfc->cf_kcunit = sac->sc_unit;
950 TAILQ_INIT(&cfc->cf_sock_entries);
951
952 *unitinfo = cfc;
953 cfil_active_count++;
954 }
955 cfil_rw_unlock_exclusive(&cfil_lck_rw);
956 done:
957 if (error != 0 && cfc != NULL)
958 zfree(content_filter_zone, cfc);
959
960 if (error == 0)
961 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
962 else
963 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
964
965 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
966 error, cfil_active_count, sac->sc_unit);
967
968 return (error);
969 }
970
971 static errno_t
972 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
973 {
974 #pragma unused(kctlref)
975 errno_t error = 0;
976 struct content_filter *cfc;
977 struct cfil_entry *entry;
978
979 CFIL_LOG(LOG_NOTICE, "");
980
981 if (content_filters == NULL) {
982 CFIL_LOG(LOG_ERR, "no content filter");
983 error = EINVAL;
984 goto done;
985 }
986 if (kcunit > MAX_CONTENT_FILTER) {
987 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
988 kcunit, MAX_CONTENT_FILTER);
989 error = EINVAL;
990 goto done;
991 }
992
993 cfc = (struct content_filter *)unitinfo;
994 if (cfc == NULL)
995 goto done;
996
997 cfil_rw_lock_exclusive(&cfil_lck_rw);
998 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
999 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1000 kcunit);
1001 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1002 goto done;
1003 }
1004 cfc->cf_flags |= CFF_DETACHING;
1005 /*
1006 * Remove all sockets from the filter
1007 */
1008 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1009 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1010
1011 verify_content_filter(cfc);
1012 /*
1013 * Accept all outstanding data by pushing to next filter
1014 * or back to socket
1015 *
1016 * TBD: Actually we should make sure all data has been pushed
1017 * back to socket
1018 */
1019 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1020 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1021 struct socket *so = cfil_info->cfi_so;
1022
1023 /* Need to let data flow immediately */
1024 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1025 CFEF_DATA_START;
1026
1027 /*
1028 * Respect locking hierarchy
1029 */
1030 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1031
1032 socket_lock(so, 1);
1033
1034 /*
1035 * When cfe_filter is NULL the filter is detached
1036 * and the entry has been removed from cf_sock_entries
1037 */
1038 if (so->so_cfil == NULL || entry->cfe_filter == NULL) {
1039 cfil_rw_lock_exclusive(&cfil_lck_rw);
1040 goto release;
1041 }
1042 (void) cfil_action_data_pass(so, kcunit, 1,
1043 CFM_MAX_OFFSET,
1044 CFM_MAX_OFFSET);
1045
1046 (void) cfil_action_data_pass(so, kcunit, 0,
1047 CFM_MAX_OFFSET,
1048 CFM_MAX_OFFSET);
1049
1050 cfil_rw_lock_exclusive(&cfil_lck_rw);
1051
1052 /*
1053 * Check again as the socket may have been unlocked
1054 * when when calling cfil_acquire_sockbuf()
1055 */
1056 if (so->so_cfil == NULL || entry->cfe_filter == NULL)
1057 goto release;
1058
1059 /* The filter is now detached */
1060 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1061 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1062 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1063
1064 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
1065 cfil_filters_attached(so) == 0) {
1066 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1067 (uint64_t)VM_KERNEL_ADDRPERM(so));
1068 wakeup((caddr_t)&so->so_cfil);
1069 }
1070
1071 /*
1072 * Remove the filter entry from the content filter
1073 * but leave the rest of the state intact as the queues
1074 * may not be empty yet
1075 */
1076 entry->cfe_filter = NULL;
1077 entry->cfe_necp_control_unit = 0;
1078
1079 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1080 cfc->cf_sock_count--;
1081 release:
1082 socket_unlock(so, 1);
1083 }
1084 }
1085 verify_content_filter(cfc);
1086
1087 VERIFY(cfc->cf_sock_count == 0);
1088
1089 /*
1090 * Make filter inactive
1091 */
1092 content_filters[kcunit - 1] = NULL;
1093 cfil_active_count--;
1094 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1095
1096 zfree(content_filter_zone, cfc);
1097 done:
1098 if (error == 0)
1099 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1100 else
1101 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1102
1103 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1104 error, cfil_active_count, kcunit);
1105
1106 return (error);
1107 }
1108
1109 /*
1110 * cfil_acquire_sockbuf()
1111 *
1112 * Prevent any other thread from acquiring the sockbuf
1113 * We use sb_cfil_thread as a semaphore to prevent other threads from
1114 * messing with the sockbuf -- see sblock()
1115 * Note: We do not set SB_LOCK here because the thread may check or modify
1116 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1117 * sblock(), sbunlock() or sodefunct()
1118 */
1119 static int
1120 cfil_acquire_sockbuf(struct socket *so, int outgoing)
1121 {
1122 thread_t tp = current_thread();
1123 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1124 lck_mtx_t *mutex_held;
1125 int error = 0;
1126
1127 /*
1128 * Wait until no thread is holding the sockbuf and other content
1129 * filter threads have released the sockbuf
1130 */
1131 while ((sb->sb_flags & SB_LOCK) ||
1132 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1133 if (so->so_proto->pr_getlock != NULL)
1134 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
1135 else
1136 mutex_held = so->so_proto->pr_domain->dom_mtx;
1137
1138 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
1139
1140 sb->sb_wantlock++;
1141 VERIFY(sb->sb_wantlock != 0);
1142
1143 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1144 NULL);
1145
1146 VERIFY(sb->sb_wantlock != 0);
1147 sb->sb_wantlock--;
1148 }
1149 /*
1150 * Use reference count for repetitive calls on same thread
1151 */
1152 if (sb->sb_cfil_refs == 0) {
1153 VERIFY(sb->sb_cfil_thread == NULL);
1154 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1155
1156 sb->sb_cfil_thread = tp;
1157 sb->sb_flags |= SB_LOCK;
1158 }
1159 sb->sb_cfil_refs++;
1160
1161 /* We acquire the socket buffer when we need to cleanup */
1162 if (so->so_cfil == NULL) {
1163 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1164 (uint64_t)VM_KERNEL_ADDRPERM(so));
1165 error = 0;
1166 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
1167 CFIL_LOG(LOG_ERR, "so %llx drop set",
1168 (uint64_t)VM_KERNEL_ADDRPERM(so));
1169 error = EPIPE;
1170 }
1171
1172 return (error);
1173 }
1174
1175 static void
1176 cfil_release_sockbuf(struct socket *so, int outgoing)
1177 {
1178 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1179 thread_t tp = current_thread();
1180
1181 socket_lock_assert_owned(so);
1182
1183 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
1184 panic("%s sb_cfil_thread %p not current %p", __func__,
1185 sb->sb_cfil_thread, tp);
1186 /*
1187 * Don't panic if we are defunct because SB_LOCK has
1188 * been cleared by sodefunct()
1189 */
1190 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
1191 panic("%s SB_LOCK not set on %p", __func__,
1192 sb);
1193 /*
1194 * We can unlock when the thread unwinds to the last reference
1195 */
1196 sb->sb_cfil_refs--;
1197 if (sb->sb_cfil_refs == 0) {
1198 sb->sb_cfil_thread = NULL;
1199 sb->sb_flags &= ~SB_LOCK;
1200
1201 if (sb->sb_wantlock > 0)
1202 wakeup(&sb->sb_flags);
1203 }
1204 }
1205
1206 cfil_sock_id_t
1207 cfil_sock_id_from_socket(struct socket *so)
1208 {
1209 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
1210 return (so->so_cfil->cfi_sock_id);
1211 else
1212 return (CFIL_SOCK_ID_NONE);
1213 }
1214
1215 static struct socket *
1216 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id)
1217 {
1218 struct socket *so = NULL;
1219 u_int64_t gencnt = cfil_sock_id >> 32;
1220 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1221 struct inpcb *inp = NULL;
1222 struct inpcbinfo *pcbinfo = &tcbinfo;
1223
1224 lck_rw_lock_shared(pcbinfo->ipi_lock);
1225 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1226 if (inp->inp_state != INPCB_STATE_DEAD &&
1227 inp->inp_socket != NULL &&
1228 inp->inp_flowhash == flowhash &&
1229 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1230 inp->inp_socket->so_cfil != NULL) {
1231 so = inp->inp_socket;
1232 break;
1233 }
1234 }
1235 lck_rw_done(pcbinfo->ipi_lock);
1236
1237 if (so == NULL) {
1238 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1239 CFIL_LOG(LOG_DEBUG,
1240 "no socket for sock_id %llx gencnt %llx flowhash %x",
1241 cfil_sock_id, gencnt, flowhash);
1242 }
1243
1244 return (so);
1245 }
1246
1247 static errno_t
1248 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1249 int flags)
1250 {
1251 #pragma unused(kctlref, flags)
1252 errno_t error = 0;
1253 struct cfil_msg_hdr *msghdr;
1254 struct content_filter *cfc = (struct content_filter *)unitinfo;
1255 struct socket *so;
1256 struct cfil_msg_action *action_msg;
1257 struct cfil_entry *entry;
1258
1259 CFIL_LOG(LOG_INFO, "");
1260
1261 if (content_filters == NULL) {
1262 CFIL_LOG(LOG_ERR, "no content filter");
1263 error = EINVAL;
1264 goto done;
1265 }
1266 if (kcunit > MAX_CONTENT_FILTER) {
1267 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1268 kcunit, MAX_CONTENT_FILTER);
1269 error = EINVAL;
1270 goto done;
1271 }
1272
1273 if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1274 CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1275 error = EINVAL;
1276 goto done;
1277 }
1278 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1279 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1280 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1281 error = EINVAL;
1282 goto done;
1283 }
1284 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1285 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1286 error = EINVAL;
1287 goto done;
1288 }
1289 /* Validate action operation */
1290 switch (msghdr->cfm_op) {
1291 case CFM_OP_DATA_UPDATE:
1292 OSIncrementAtomic(
1293 &cfil_stats.cfs_ctl_action_data_update);
1294 break;
1295 case CFM_OP_DROP:
1296 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1297 break;
1298 default:
1299 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1300 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1301 error = EINVAL;
1302 goto done;
1303 }
1304 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1305 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1306 error = EINVAL;
1307 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1308 msghdr->cfm_len,
1309 msghdr->cfm_op);
1310 goto done;
1311 }
1312 cfil_rw_lock_shared(&cfil_lck_rw);
1313 if (cfc != (void *)content_filters[kcunit - 1]) {
1314 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1315 kcunit);
1316 error = EINVAL;
1317 cfil_rw_unlock_shared(&cfil_lck_rw);
1318 goto done;
1319 }
1320
1321 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id);
1322 if (so == NULL) {
1323 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1324 msghdr->cfm_sock_id);
1325 error = EINVAL;
1326 cfil_rw_unlock_shared(&cfil_lck_rw);
1327 goto done;
1328 }
1329 cfil_rw_unlock_shared(&cfil_lck_rw);
1330
1331 socket_lock(so, 1);
1332
1333 if (so->so_cfil == NULL) {
1334 CFIL_LOG(LOG_NOTICE, "so %llx not attached",
1335 (uint64_t)VM_KERNEL_ADDRPERM(so));
1336 error = EINVAL;
1337 goto unlock;
1338 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
1339 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1340 (uint64_t)VM_KERNEL_ADDRPERM(so));
1341 error = EINVAL;
1342 goto unlock;
1343 }
1344 entry = &so->so_cfil->cfi_entries[kcunit - 1];
1345 if (entry->cfe_filter == NULL) {
1346 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1347 (uint64_t)VM_KERNEL_ADDRPERM(so));
1348 error = EINVAL;
1349 goto unlock;
1350 }
1351
1352 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
1353 entry->cfe_flags |= CFEF_DATA_START;
1354 else {
1355 CFIL_LOG(LOG_ERR,
1356 "so %llx attached not sent for %u",
1357 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1358 error = EINVAL;
1359 goto unlock;
1360 }
1361
1362 microuptime(&entry->cfe_last_action);
1363
1364 action_msg = (struct cfil_msg_action *)msghdr;
1365
1366 switch (msghdr->cfm_op) {
1367 case CFM_OP_DATA_UPDATE:
1368 if (action_msg->cfa_out_peek_offset != 0 ||
1369 action_msg->cfa_out_pass_offset != 0)
1370 error = cfil_action_data_pass(so, kcunit, 1,
1371 action_msg->cfa_out_pass_offset,
1372 action_msg->cfa_out_peek_offset);
1373 if (error == EJUSTRETURN)
1374 error = 0;
1375 if (error != 0)
1376 break;
1377 if (action_msg->cfa_in_peek_offset != 0 ||
1378 action_msg->cfa_in_pass_offset != 0)
1379 error = cfil_action_data_pass(so, kcunit, 0,
1380 action_msg->cfa_in_pass_offset,
1381 action_msg->cfa_in_peek_offset);
1382 if (error == EJUSTRETURN)
1383 error = 0;
1384 break;
1385
1386 case CFM_OP_DROP:
1387 error = cfil_action_drop(so, kcunit);
1388 break;
1389
1390 default:
1391 error = EINVAL;
1392 break;
1393 }
1394 unlock:
1395 socket_unlock(so, 1);
1396 done:
1397 mbuf_freem(m);
1398
1399 if (error == 0)
1400 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1401 else
1402 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1403
1404 return (error);
1405 }
1406
1407 static errno_t
1408 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1409 int opt, void *data, size_t *len)
1410 {
1411 #pragma unused(kctlref, opt)
1412 errno_t error = 0;
1413 struct content_filter *cfc = (struct content_filter *)unitinfo;
1414
1415 CFIL_LOG(LOG_NOTICE, "");
1416
1417 cfil_rw_lock_shared(&cfil_lck_rw);
1418
1419 if (content_filters == NULL) {
1420 CFIL_LOG(LOG_ERR, "no content filter");
1421 error = EINVAL;
1422 goto done;
1423 }
1424 if (kcunit > MAX_CONTENT_FILTER) {
1425 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1426 kcunit, MAX_CONTENT_FILTER);
1427 error = EINVAL;
1428 goto done;
1429 }
1430 if (cfc != (void *)content_filters[kcunit - 1]) {
1431 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1432 kcunit);
1433 error = EINVAL;
1434 goto done;
1435 }
1436 switch (opt) {
1437 case CFIL_OPT_NECP_CONTROL_UNIT:
1438 if (*len < sizeof(uint32_t)) {
1439 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1440 error = EINVAL;
1441 goto done;
1442 }
1443 if (data != NULL)
1444 *(uint32_t *)data = cfc->cf_necp_control_unit;
1445 break;
1446 default:
1447 error = ENOPROTOOPT;
1448 break;
1449 }
1450 done:
1451 cfil_rw_unlock_shared(&cfil_lck_rw);
1452
1453 return (error);
1454 }
1455
1456 static errno_t
1457 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1458 int opt, void *data, size_t len)
1459 {
1460 #pragma unused(kctlref, opt)
1461 errno_t error = 0;
1462 struct content_filter *cfc = (struct content_filter *)unitinfo;
1463
1464 CFIL_LOG(LOG_NOTICE, "");
1465
1466 cfil_rw_lock_exclusive(&cfil_lck_rw);
1467
1468 if (content_filters == NULL) {
1469 CFIL_LOG(LOG_ERR, "no content filter");
1470 error = EINVAL;
1471 goto done;
1472 }
1473 if (kcunit > MAX_CONTENT_FILTER) {
1474 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1475 kcunit, MAX_CONTENT_FILTER);
1476 error = EINVAL;
1477 goto done;
1478 }
1479 if (cfc != (void *)content_filters[kcunit - 1]) {
1480 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1481 kcunit);
1482 error = EINVAL;
1483 goto done;
1484 }
1485 switch (opt) {
1486 case CFIL_OPT_NECP_CONTROL_UNIT:
1487 if (len < sizeof(uint32_t)) {
1488 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1489 "len too small %lu", len);
1490 error = EINVAL;
1491 goto done;
1492 }
1493 if (cfc->cf_necp_control_unit != 0) {
1494 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1495 "already set %u",
1496 cfc->cf_necp_control_unit);
1497 error = EINVAL;
1498 goto done;
1499 }
1500 cfc->cf_necp_control_unit = *(uint32_t *)data;
1501 break;
1502 default:
1503 error = ENOPROTOOPT;
1504 break;
1505 }
1506 done:
1507 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1508
1509 return (error);
1510 }
1511
1512
1513 static void
1514 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
1515 {
1516 #pragma unused(kctlref, flags)
1517 struct content_filter *cfc = (struct content_filter *)unitinfo;
1518 struct socket *so = NULL;
1519 int error;
1520 struct cfil_entry *entry;
1521
1522 CFIL_LOG(LOG_INFO, "");
1523
1524 if (content_filters == NULL) {
1525 CFIL_LOG(LOG_ERR, "no content filter");
1526 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1527 return;
1528 }
1529 if (kcunit > MAX_CONTENT_FILTER) {
1530 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1531 kcunit, MAX_CONTENT_FILTER);
1532 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1533 return;
1534 }
1535 cfil_rw_lock_shared(&cfil_lck_rw);
1536 if (cfc != (void *)content_filters[kcunit - 1]) {
1537 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1538 kcunit);
1539 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1540 goto done;
1541 }
1542 /* Let's assume the flow control is lifted */
1543 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
1544 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
1545 cfil_rw_lock_exclusive(&cfil_lck_rw);
1546
1547 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
1548
1549 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
1550 lck_rw_assert(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
1551 }
1552 /*
1553 * Flow control will be raised again as soon as an entry cannot enqueue
1554 * to the kernel control socket
1555 */
1556 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
1557 verify_content_filter(cfc);
1558
1559 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
1560
1561 /* Find an entry that is flow controlled */
1562 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1563 if (entry->cfe_cfil_info == NULL ||
1564 entry->cfe_cfil_info->cfi_so == NULL)
1565 continue;
1566 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0)
1567 continue;
1568 }
1569 if (entry == NULL)
1570 break;
1571
1572 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
1573
1574 so = entry->cfe_cfil_info->cfi_so;
1575
1576 cfil_rw_unlock_shared(&cfil_lck_rw);
1577 socket_lock(so, 1);
1578
1579 do {
1580 error = cfil_acquire_sockbuf(so, 1);
1581 if (error == 0)
1582 error = cfil_data_service_ctl_q(so, kcunit, 1);
1583 cfil_release_sockbuf(so, 1);
1584 if (error != 0)
1585 break;
1586
1587 error = cfil_acquire_sockbuf(so, 0);
1588 if (error == 0)
1589 error = cfil_data_service_ctl_q(so, kcunit, 0);
1590 cfil_release_sockbuf(so, 0);
1591 } while (0);
1592
1593 socket_lock_assert_owned(so);
1594 socket_unlock(so, 1);
1595
1596 cfil_rw_lock_shared(&cfil_lck_rw);
1597 }
1598 done:
1599 cfil_rw_unlock_shared(&cfil_lck_rw);
1600 }
1601
1602 void
1603 cfil_init(void)
1604 {
1605 struct kern_ctl_reg kern_ctl;
1606 errno_t error = 0;
1607 vm_size_t content_filter_size = 0; /* size of content_filter */
1608 vm_size_t cfil_info_size = 0; /* size of cfil_info */
1609
1610 CFIL_LOG(LOG_NOTICE, "");
1611
1612 /*
1613 * Compile time verifications
1614 */
1615 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
1616 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
1617 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
1618 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
1619
1620 /*
1621 * Runtime time verifications
1622 */
1623 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
1624 sizeof(uint32_t)));
1625 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
1626 sizeof(uint32_t)));
1627 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
1628 sizeof(uint32_t)));
1629 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
1630 sizeof(uint32_t)));
1631
1632 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
1633 sizeof(uint32_t)));
1634 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
1635 sizeof(uint32_t)));
1636
1637 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
1638 sizeof(uint32_t)));
1639 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
1640 sizeof(uint32_t)));
1641 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
1642 sizeof(uint32_t)));
1643 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
1644 sizeof(uint32_t)));
1645
1646 /*
1647 * Zone for content filters kernel control sockets
1648 */
1649 content_filter_size = sizeof(struct content_filter);
1650 content_filter_zone = zinit(content_filter_size,
1651 CONTENT_FILTER_ZONE_MAX * content_filter_size,
1652 0,
1653 CONTENT_FILTER_ZONE_NAME);
1654 if (content_filter_zone == NULL) {
1655 panic("%s: zinit(%s) failed", __func__,
1656 CONTENT_FILTER_ZONE_NAME);
1657 /* NOTREACHED */
1658 }
1659 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
1660 zone_change(content_filter_zone, Z_EXPAND, TRUE);
1661
1662 /*
1663 * Zone for per socket content filters
1664 */
1665 cfil_info_size = sizeof(struct cfil_info);
1666 cfil_info_zone = zinit(cfil_info_size,
1667 CFIL_INFO_ZONE_MAX * cfil_info_size,
1668 0,
1669 CFIL_INFO_ZONE_NAME);
1670 if (cfil_info_zone == NULL) {
1671 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
1672 /* NOTREACHED */
1673 }
1674 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
1675 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
1676
1677 /*
1678 * Allocate locks
1679 */
1680 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
1681 if (cfil_lck_grp_attr == NULL) {
1682 panic("%s: lck_grp_attr_alloc_init failed", __func__);
1683 /* NOTREACHED */
1684 }
1685 cfil_lck_grp = lck_grp_alloc_init("content filter",
1686 cfil_lck_grp_attr);
1687 if (cfil_lck_grp == NULL) {
1688 panic("%s: lck_grp_alloc_init failed", __func__);
1689 /* NOTREACHED */
1690 }
1691 cfil_lck_attr = lck_attr_alloc_init();
1692 if (cfil_lck_attr == NULL) {
1693 panic("%s: lck_attr_alloc_init failed", __func__);
1694 /* NOTREACHED */
1695 }
1696 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
1697
1698 TAILQ_INIT(&cfil_sock_head);
1699
1700 /*
1701 * Register kernel control
1702 */
1703 bzero(&kern_ctl, sizeof(kern_ctl));
1704 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
1705 sizeof(kern_ctl.ctl_name));
1706 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
1707 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
1708 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
1709 kern_ctl.ctl_connect = cfil_ctl_connect;
1710 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
1711 kern_ctl.ctl_send = cfil_ctl_send;
1712 kern_ctl.ctl_getopt = cfil_ctl_getopt;
1713 kern_ctl.ctl_setopt = cfil_ctl_setopt;
1714 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
1715 error = ctl_register(&kern_ctl, &cfil_kctlref);
1716 if (error != 0) {
1717 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
1718 return;
1719 }
1720 }
1721
1722 struct cfil_info *
1723 cfil_info_alloc(struct socket *so)
1724 {
1725 int kcunit;
1726 struct cfil_info *cfil_info = NULL;
1727 struct inpcb *inp = sotoinpcb(so);
1728
1729 CFIL_LOG(LOG_INFO, "");
1730
1731 socket_lock_assert_owned(so);
1732
1733 cfil_info = zalloc(cfil_info_zone);
1734 if (cfil_info == NULL)
1735 goto done;
1736 bzero(cfil_info, sizeof(struct cfil_info));
1737
1738 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
1739 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
1740
1741 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1742 struct cfil_entry *entry;
1743
1744 entry = &cfil_info->cfi_entries[kcunit - 1];
1745 entry->cfe_cfil_info = cfil_info;
1746
1747 /* Initialize the filter entry */
1748 entry->cfe_filter = NULL;
1749 entry->cfe_flags = 0;
1750 entry->cfe_necp_control_unit = 0;
1751 entry->cfe_snd.cfe_pass_offset = 0;
1752 entry->cfe_snd.cfe_peek_offset = 0;
1753 entry->cfe_snd.cfe_peeked = 0;
1754 entry->cfe_rcv.cfe_pass_offset = 0;
1755 entry->cfe_rcv.cfe_peek_offset = 0;
1756 entry->cfe_rcv.cfe_peeked = 0;
1757
1758 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
1759 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
1760 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
1761 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
1762 }
1763
1764 cfil_rw_lock_exclusive(&cfil_lck_rw);
1765
1766 so->so_cfil = cfil_info;
1767 cfil_info->cfi_so = so;
1768 /*
1769 * Create a cfi_sock_id that's not the socket pointer!
1770 */
1771 if (inp->inp_flowhash == 0)
1772 inp->inp_flowhash = inp_calc_flowhash(inp);
1773 cfil_info->cfi_sock_id =
1774 ((so->so_gencnt << 32) | inp->inp_flowhash);
1775
1776 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
1777
1778 cfil_sock_attached_count++;
1779
1780 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1781
1782 done:
1783 if (cfil_info != NULL)
1784 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
1785 else
1786 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
1787
1788 return (cfil_info);
1789 }
1790
1791 int
1792 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit)
1793 {
1794 int kcunit;
1795 struct cfil_info *cfil_info = so->so_cfil;
1796 int attached = 0;
1797
1798 CFIL_LOG(LOG_INFO, "");
1799
1800 socket_lock_assert_owned(so);
1801
1802 cfil_rw_lock_exclusive(&cfil_lck_rw);
1803
1804 for (kcunit = 1;
1805 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
1806 kcunit++) {
1807 struct content_filter *cfc = content_filters[kcunit - 1];
1808 struct cfil_entry *entry;
1809
1810 if (cfc == NULL)
1811 continue;
1812 if (cfc->cf_necp_control_unit != filter_control_unit)
1813 continue;
1814
1815 entry = &cfil_info->cfi_entries[kcunit - 1];
1816
1817 entry->cfe_filter = cfc;
1818 entry->cfe_necp_control_unit = filter_control_unit;
1819 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
1820 cfc->cf_sock_count++;
1821 verify_content_filter(cfc);
1822 attached = 1;
1823 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
1824 break;
1825 }
1826
1827 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1828
1829 return (attached);
1830 }
1831
1832 static void
1833 cfil_info_free(struct socket *so, struct cfil_info *cfil_info)
1834 {
1835 int kcunit;
1836 uint64_t in_drain = 0;
1837 uint64_t out_drained = 0;
1838
1839 so->so_cfil = NULL;
1840
1841 if (so->so_flags & SOF_CONTENT_FILTER) {
1842 so->so_flags &= ~SOF_CONTENT_FILTER;
1843 VERIFY(so->so_usecount > 0);
1844 so->so_usecount--;
1845 }
1846 if (cfil_info == NULL)
1847 return;
1848
1849 CFIL_LOG(LOG_INFO, "");
1850
1851 cfil_rw_lock_exclusive(&cfil_lck_rw);
1852
1853 for (kcunit = 1;
1854 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
1855 kcunit++) {
1856 struct cfil_entry *entry;
1857 struct content_filter *cfc;
1858
1859 entry = &cfil_info->cfi_entries[kcunit - 1];
1860
1861 /* Don't be silly and try to detach twice */
1862 if (entry->cfe_filter == NULL)
1863 continue;
1864
1865 cfc = content_filters[kcunit - 1];
1866
1867 VERIFY(cfc == entry->cfe_filter);
1868
1869 entry->cfe_filter = NULL;
1870 entry->cfe_necp_control_unit = 0;
1871 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1872 cfc->cf_sock_count--;
1873
1874 verify_content_filter(cfc);
1875 }
1876 cfil_sock_attached_count--;
1877 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
1878
1879 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
1880 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
1881
1882 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1883 struct cfil_entry *entry;
1884
1885 entry = &cfil_info->cfi_entries[kcunit - 1];
1886 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
1887 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
1888 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
1889 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
1890 }
1891 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1892
1893 if (out_drained)
1894 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
1895 if (in_drain)
1896 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
1897
1898 zfree(cfil_info_zone, cfil_info);
1899 }
1900
1901 /*
1902 * Entry point from Sockets layer
1903 * The socket is locked.
1904 */
1905 errno_t
1906 cfil_sock_attach(struct socket *so)
1907 {
1908 errno_t error = 0;
1909 uint32_t filter_control_unit;
1910
1911 socket_lock_assert_owned(so);
1912
1913 /* Limit ourselves to TCP */
1914 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
1915 so->so_proto->pr_domain->dom_family != PF_INET6) ||
1916 so->so_proto->pr_type != SOCK_STREAM ||
1917 so->so_proto->pr_protocol != IPPROTO_TCP)
1918 goto done;
1919
1920 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
1921 if (filter_control_unit == 0)
1922 goto done;
1923
1924 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
1925 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
1926 goto done;
1927 }
1928 if (cfil_active_count == 0) {
1929 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
1930 goto done;
1931 }
1932 if (so->so_cfil != NULL) {
1933 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
1934 CFIL_LOG(LOG_ERR, "already attached");
1935 } else {
1936 cfil_info_alloc(so);
1937 if (so->so_cfil == NULL) {
1938 error = ENOMEM;
1939 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
1940 goto done;
1941 }
1942 }
1943 if (cfil_info_attach_unit(so, filter_control_unit) == 0) {
1944 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
1945 filter_control_unit);
1946 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
1947 goto done;
1948 }
1949 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockid %llx",
1950 (uint64_t)VM_KERNEL_ADDRPERM(so),
1951 filter_control_unit, so->so_cfil->cfi_sock_id);
1952
1953 so->so_flags |= SOF_CONTENT_FILTER;
1954 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
1955
1956 /* Hold a reference on the socket */
1957 so->so_usecount++;
1958
1959 error = cfil_dispatch_attach_event(so, filter_control_unit);
1960 /* We can recover from flow control or out of memory errors */
1961 if (error == ENOBUFS || error == ENOMEM)
1962 error = 0;
1963 else if (error != 0)
1964 goto done;
1965
1966 CFIL_INFO_VERIFY(so->so_cfil);
1967 done:
1968 return (error);
1969 }
1970
1971 /*
1972 * Entry point from Sockets layer
1973 * The socket is locked.
1974 */
1975 errno_t
1976 cfil_sock_detach(struct socket *so)
1977 {
1978 if (so->so_cfil) {
1979 cfil_info_free(so, so->so_cfil);
1980 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
1981 }
1982 return (0);
1983 }
1984
1985 static int
1986 cfil_dispatch_attach_event(struct socket *so, uint32_t filter_control_unit)
1987 {
1988 errno_t error = 0;
1989 struct cfil_entry *entry = NULL;
1990 struct cfil_msg_sock_attached msg_attached;
1991 uint32_t kcunit;
1992 struct content_filter *cfc;
1993
1994 socket_lock_assert_owned(so);
1995
1996 cfil_rw_lock_shared(&cfil_lck_rw);
1997
1998 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
1999 error = EINVAL;
2000 goto done;
2001 }
2002 /*
2003 * Find the matching filter unit
2004 */
2005 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2006 cfc = content_filters[kcunit - 1];
2007
2008 if (cfc == NULL)
2009 continue;
2010 if (cfc->cf_necp_control_unit != filter_control_unit)
2011 continue;
2012 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2013 if (entry->cfe_filter == NULL)
2014 continue;
2015
2016 VERIFY(cfc == entry->cfe_filter);
2017
2018 break;
2019 }
2020
2021 if (entry == NULL || entry->cfe_filter == NULL)
2022 goto done;
2023
2024 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
2025 goto done;
2026
2027 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2028 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
2029
2030 /* Would be wasteful to try when flow controlled */
2031 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2032 error = ENOBUFS;
2033 goto done;
2034 }
2035
2036 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2037 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2038 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2039 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2040 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2041 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2042
2043 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2044 msg_attached.cfs_sock_type = so->so_proto->pr_type;
2045 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2046 msg_attached.cfs_pid = so->last_pid;
2047 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2048 if (so->so_flags & SOF_DELEGATED) {
2049 msg_attached.cfs_e_pid = so->e_pid;
2050 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2051 } else {
2052 msg_attached.cfs_e_pid = so->last_pid;
2053 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2054 }
2055 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2056 entry->cfe_filter->cf_kcunit,
2057 &msg_attached,
2058 sizeof(struct cfil_msg_sock_attached),
2059 CTL_DATA_EOR);
2060 if (error != 0) {
2061 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2062 goto done;
2063 }
2064 microuptime(&entry->cfe_last_event);
2065 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
2066 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2067 done:
2068
2069 /* We can recover from flow control */
2070 if (error == ENOBUFS) {
2071 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2072 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2073
2074 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2075 cfil_rw_lock_exclusive(&cfil_lck_rw);
2076
2077 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2078
2079 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2080 } else {
2081 if (error != 0)
2082 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
2083
2084 cfil_rw_unlock_shared(&cfil_lck_rw);
2085 }
2086 return (error);
2087 }
2088
2089 static int
2090 cfil_dispatch_disconnect_event(struct socket *so, uint32_t kcunit, int outgoing)
2091 {
2092 errno_t error = 0;
2093 struct mbuf *msg = NULL;
2094 struct cfil_entry *entry;
2095 struct cfe_buf *entrybuf;
2096 struct cfil_msg_hdr msg_disconnected;
2097 struct content_filter *cfc;
2098
2099 socket_lock_assert_owned(so);
2100
2101 cfil_rw_lock_shared(&cfil_lck_rw);
2102
2103 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2104 if (outgoing)
2105 entrybuf = &entry->cfe_snd;
2106 else
2107 entrybuf = &entry->cfe_rcv;
2108
2109 cfc = entry->cfe_filter;
2110 if (cfc == NULL)
2111 goto done;
2112
2113 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2114 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2115
2116 /*
2117 * Send the disconnection event once
2118 */
2119 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
2120 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
2121 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
2122 (uint64_t)VM_KERNEL_ADDRPERM(so));
2123 goto done;
2124 }
2125
2126 /*
2127 * We're not disconnected as long as some data is waiting
2128 * to be delivered to the filter
2129 */
2130 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
2131 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
2132 (uint64_t)VM_KERNEL_ADDRPERM(so));
2133 error = EBUSY;
2134 goto done;
2135 }
2136 /* Would be wasteful to try when flow controlled */
2137 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2138 error = ENOBUFS;
2139 goto done;
2140 }
2141
2142 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2143 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2144 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2145 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2146 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
2147 CFM_OP_DISCONNECT_IN;
2148 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2149 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2150 entry->cfe_filter->cf_kcunit,
2151 &msg_disconnected,
2152 sizeof(struct cfil_msg_hdr),
2153 CTL_DATA_EOR);
2154 if (error != 0) {
2155 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2156 mbuf_freem(msg);
2157 goto done;
2158 }
2159 microuptime(&entry->cfe_last_event);
2160
2161 /* Remember we have sent the disconnection message */
2162 if (outgoing) {
2163 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
2164 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2165 } else {
2166 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
2167 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2168 }
2169 done:
2170 if (error == ENOBUFS) {
2171 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2172 OSIncrementAtomic(
2173 &cfil_stats.cfs_disconnect_event_flow_control);
2174
2175 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2176 cfil_rw_lock_exclusive(&cfil_lck_rw);
2177
2178 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2179
2180 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2181 } else {
2182 if (error != 0)
2183 OSIncrementAtomic(
2184 &cfil_stats.cfs_disconnect_event_fail);
2185
2186 cfil_rw_unlock_shared(&cfil_lck_rw);
2187 }
2188 return (error);
2189 }
2190
2191 int
2192 cfil_dispatch_closed_event(struct socket *so, int kcunit)
2193 {
2194 struct cfil_entry *entry;
2195 struct cfil_msg_hdr msg_closed;
2196 errno_t error = 0;
2197 struct content_filter *cfc;
2198
2199 socket_lock_assert_owned(so);
2200
2201 cfil_rw_lock_shared(&cfil_lck_rw);
2202
2203 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2204 cfc = entry->cfe_filter;
2205 if (cfc == NULL)
2206 goto done;
2207
2208 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
2209 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2210
2211 /* Would be wasteful to try when flow controlled */
2212 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2213 error = ENOBUFS;
2214 goto done;
2215 }
2216 /*
2217 * Send a single closed message per filter
2218 */
2219 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0)
2220 goto done;
2221 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
2222 goto done;
2223
2224 bzero(&msg_closed, sizeof(struct cfil_msg_hdr));
2225 msg_closed.cfm_len = sizeof(struct cfil_msg_hdr);
2226 msg_closed.cfm_version = CFM_VERSION_CURRENT;
2227 msg_closed.cfm_type = CFM_TYPE_EVENT;
2228 msg_closed.cfm_op = CFM_OP_SOCKET_CLOSED;
2229 msg_closed.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2230 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2231 entry->cfe_filter->cf_kcunit,
2232 &msg_closed,
2233 sizeof(struct cfil_msg_hdr),
2234 CTL_DATA_EOR);
2235 if (error != 0) {
2236 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
2237 error);
2238 goto done;
2239 }
2240 microuptime(&entry->cfe_last_event);
2241 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
2242 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2243 done:
2244 /* We can recover from flow control */
2245 if (error == ENOBUFS) {
2246 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2247 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2248
2249 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2250 cfil_rw_lock_exclusive(&cfil_lck_rw);
2251
2252 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2253
2254 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2255 } else {
2256 if (error != 0)
2257 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
2258
2259 cfil_rw_unlock_shared(&cfil_lck_rw);
2260 }
2261
2262 return (error);
2263 }
2264
2265 static void
2266 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2267 struct in6_addr *ip6, u_int16_t port)
2268 {
2269 struct sockaddr_in6 *sin6 = &sin46->sin6;
2270
2271 sin6->sin6_family = AF_INET6;
2272 sin6->sin6_len = sizeof(*sin6);
2273 sin6->sin6_port = port;
2274 sin6->sin6_addr = *ip6;
2275 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2276 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
2277 sin6->sin6_addr.s6_addr16[1] = 0;
2278 }
2279 }
2280
2281 static void
2282 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2283 struct in_addr ip, u_int16_t port)
2284 {
2285 struct sockaddr_in *sin = &sin46->sin;
2286
2287 sin->sin_family = AF_INET;
2288 sin->sin_len = sizeof(*sin);
2289 sin->sin_port = port;
2290 sin->sin_addr.s_addr = ip.s_addr;
2291 }
2292
2293 static int
2294 cfil_dispatch_data_event(struct socket *so, uint32_t kcunit, int outgoing,
2295 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
2296 {
2297 errno_t error = 0;
2298 struct mbuf *copy = NULL;
2299 struct mbuf *msg = NULL;
2300 unsigned int one = 1;
2301 struct cfil_msg_data_event *data_req;
2302 size_t hdrsize;
2303 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2304 struct cfil_entry *entry;
2305 struct cfe_buf *entrybuf;
2306 struct content_filter *cfc;
2307
2308 cfil_rw_lock_shared(&cfil_lck_rw);
2309
2310 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2311 if (outgoing)
2312 entrybuf = &entry->cfe_snd;
2313 else
2314 entrybuf = &entry->cfe_rcv;
2315
2316 cfc = entry->cfe_filter;
2317 if (cfc == NULL)
2318 goto done;
2319
2320 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2321 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2322
2323 socket_lock_assert_owned(so);
2324
2325 /* Would be wasteful to try */
2326 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2327 error = ENOBUFS;
2328 goto done;
2329 }
2330
2331 /* Make a copy of the data to pass to kernel control socket */
2332 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
2333 M_COPYM_NOOP_HDR);
2334 if (copy == NULL) {
2335 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2336 error = ENOMEM;
2337 goto done;
2338 }
2339
2340 /* We need an mbuf packet for the message header */
2341 hdrsize = sizeof(struct cfil_msg_data_event);
2342 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2343 if (error != 0) {
2344 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2345 m_freem(copy);
2346 /*
2347 * ENOBUFS is to indicate flow control
2348 */
2349 error = ENOMEM;
2350 goto done;
2351 }
2352 mbuf_setlen(msg, hdrsize);
2353 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2354 msg->m_next = copy;
2355 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2356 bzero(data_req, hdrsize);
2357 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2358 data_req->cfd_msghdr.cfm_version = 1;
2359 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2360 data_req->cfd_msghdr.cfm_op =
2361 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
2362 data_req->cfd_msghdr.cfm_sock_id =
2363 entry->cfe_cfil_info->cfi_sock_id;
2364 data_req->cfd_start_offset = entrybuf->cfe_peeked;
2365 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2366
2367 /*
2368 * TBD:
2369 * For non connected sockets need to copy addresses from passed
2370 * parameters
2371 */
2372 if (inp->inp_vflag & INP_IPV6) {
2373 if (outgoing) {
2374 fill_ip6_sockaddr_4_6(&data_req->cfc_src,
2375 &inp->in6p_laddr, inp->inp_lport);
2376 fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
2377 &inp->in6p_faddr, inp->inp_fport);
2378 } else {
2379 fill_ip6_sockaddr_4_6(&data_req->cfc_src,
2380 &inp->in6p_faddr, inp->inp_fport);
2381 fill_ip6_sockaddr_4_6(&data_req->cfc_dst,
2382 &inp->in6p_laddr, inp->inp_lport);
2383 }
2384 } else if (inp->inp_vflag & INP_IPV4) {
2385 if (outgoing) {
2386 fill_ip_sockaddr_4_6(&data_req->cfc_src,
2387 inp->inp_laddr, inp->inp_lport);
2388 fill_ip_sockaddr_4_6(&data_req->cfc_dst,
2389 inp->inp_faddr, inp->inp_fport);
2390 } else {
2391 fill_ip_sockaddr_4_6(&data_req->cfc_src,
2392 inp->inp_faddr, inp->inp_fport);
2393 fill_ip_sockaddr_4_6(&data_req->cfc_dst,
2394 inp->inp_laddr, inp->inp_lport);
2395 }
2396 }
2397
2398 /* Pass the message to the content filter */
2399 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
2400 entry->cfe_filter->cf_kcunit,
2401 msg, CTL_DATA_EOR);
2402 if (error != 0) {
2403 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2404 mbuf_freem(msg);
2405 goto done;
2406 }
2407 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
2408 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
2409 done:
2410 if (error == ENOBUFS) {
2411 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2412 OSIncrementAtomic(
2413 &cfil_stats.cfs_data_event_flow_control);
2414
2415 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2416 cfil_rw_lock_exclusive(&cfil_lck_rw);
2417
2418 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2419
2420 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2421 } else {
2422 if (error != 0)
2423 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
2424
2425 cfil_rw_unlock_shared(&cfil_lck_rw);
2426 }
2427 return (error);
2428 }
2429
2430 /*
2431 * Process the queue of data waiting to be delivered to content filter
2432 */
2433 static int
2434 cfil_data_service_ctl_q(struct socket *so, uint32_t kcunit, int outgoing)
2435 {
2436 errno_t error = 0;
2437 struct mbuf *data, *tmp = NULL;
2438 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
2439 struct cfil_entry *entry;
2440 struct cfe_buf *entrybuf;
2441 uint64_t currentoffset = 0;
2442
2443 if (so->so_cfil == NULL)
2444 return (0);
2445
2446 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2447 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2448
2449 socket_lock_assert_owned(so);
2450
2451 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2452 if (outgoing)
2453 entrybuf = &entry->cfe_snd;
2454 else
2455 entrybuf = &entry->cfe_rcv;
2456
2457 /* Send attached message if not yet done */
2458 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
2459 error = cfil_dispatch_attach_event(so, kcunit);
2460 if (error != 0) {
2461 /* We can recover from flow control */
2462 if (error == ENOBUFS || error == ENOMEM)
2463 error = 0;
2464 goto done;
2465 }
2466 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
2467 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
2468 goto done;
2469 }
2470 CFIL_LOG(LOG_DEBUG, "pass_offset %llu peeked %llu peek_offset %llu",
2471 entrybuf->cfe_pass_offset,
2472 entrybuf->cfe_peeked,
2473 entrybuf->cfe_peek_offset);
2474
2475 /* Move all data that can pass */
2476 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
2477 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
2478 datalen = cfil_data_length(data, NULL);
2479 tmp = data;
2480
2481 if (entrybuf->cfe_ctl_q.q_start + datalen <=
2482 entrybuf->cfe_pass_offset) {
2483 /*
2484 * The first mbuf can fully pass
2485 */
2486 copylen = datalen;
2487 } else {
2488 /*
2489 * The first mbuf can partially pass
2490 */
2491 copylen = entrybuf->cfe_pass_offset -
2492 entrybuf->cfe_ctl_q.q_start;
2493 }
2494 VERIFY(copylen <= datalen);
2495
2496 CFIL_LOG(LOG_DEBUG,
2497 "%llx first %llu peeked %llu pass %llu peek %llu"
2498 "datalen %u copylen %u",
2499 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2500 entrybuf->cfe_ctl_q.q_start,
2501 entrybuf->cfe_peeked,
2502 entrybuf->cfe_pass_offset,
2503 entrybuf->cfe_peek_offset,
2504 datalen, copylen);
2505
2506 /*
2507 * Data that passes has been peeked at explicitly or
2508 * implicitly
2509 */
2510 if (entrybuf->cfe_ctl_q.q_start + copylen >
2511 entrybuf->cfe_peeked)
2512 entrybuf->cfe_peeked =
2513 entrybuf->cfe_ctl_q.q_start + copylen;
2514 /*
2515 * Stop on partial pass
2516 */
2517 if (copylen < datalen)
2518 break;
2519
2520 /* All good, move full data from ctl queue to pending queue */
2521 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
2522
2523 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
2524 if (outgoing)
2525 OSAddAtomic64(datalen,
2526 &cfil_stats.cfs_pending_q_out_enqueued);
2527 else
2528 OSAddAtomic64(datalen,
2529 &cfil_stats.cfs_pending_q_in_enqueued);
2530 }
2531 CFIL_INFO_VERIFY(so->so_cfil);
2532 if (tmp != NULL)
2533 CFIL_LOG(LOG_DEBUG,
2534 "%llx first %llu peeked %llu pass %llu peek %llu"
2535 "datalen %u copylen %u",
2536 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2537 entrybuf->cfe_ctl_q.q_start,
2538 entrybuf->cfe_peeked,
2539 entrybuf->cfe_pass_offset,
2540 entrybuf->cfe_peek_offset,
2541 datalen, copylen);
2542 tmp = NULL;
2543
2544 /* Now deal with remaining data the filter wants to peek at */
2545 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
2546 currentoffset = entrybuf->cfe_ctl_q.q_start;
2547 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
2548 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
2549 currentoffset += datalen) {
2550 datalen = cfil_data_length(data, NULL);
2551 tmp = data;
2552
2553 /* We've already peeked at this mbuf */
2554 if (currentoffset + datalen <= entrybuf->cfe_peeked)
2555 continue;
2556 /*
2557 * The data in the first mbuf may have been
2558 * partially peeked at
2559 */
2560 copyoffset = entrybuf->cfe_peeked - currentoffset;
2561 VERIFY(copyoffset < datalen);
2562 copylen = datalen - copyoffset;
2563 VERIFY(copylen <= datalen);
2564 /*
2565 * Do not copy more than needed
2566 */
2567 if (currentoffset + copyoffset + copylen >
2568 entrybuf->cfe_peek_offset) {
2569 copylen = entrybuf->cfe_peek_offset -
2570 (currentoffset + copyoffset);
2571 }
2572
2573 CFIL_LOG(LOG_DEBUG,
2574 "%llx current %llu peeked %llu pass %llu peek %llu"
2575 "datalen %u copylen %u copyoffset %u",
2576 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2577 currentoffset,
2578 entrybuf->cfe_peeked,
2579 entrybuf->cfe_pass_offset,
2580 entrybuf->cfe_peek_offset,
2581 datalen, copylen, copyoffset);
2582
2583 /*
2584 * Stop if there is nothing more to peek at
2585 */
2586 if (copylen == 0)
2587 break;
2588 /*
2589 * Let the filter get a peek at this span of data
2590 */
2591 error = cfil_dispatch_data_event(so, kcunit,
2592 outgoing, data, copyoffset, copylen);
2593 if (error != 0) {
2594 /* On error, leave data in ctl_q */
2595 break;
2596 }
2597 entrybuf->cfe_peeked += copylen;
2598 if (outgoing)
2599 OSAddAtomic64(copylen,
2600 &cfil_stats.cfs_ctl_q_out_peeked);
2601 else
2602 OSAddAtomic64(copylen,
2603 &cfil_stats.cfs_ctl_q_in_peeked);
2604
2605 /* Stop when data could not be fully peeked at */
2606 if (copylen + copyoffset < datalen)
2607 break;
2608 }
2609 CFIL_INFO_VERIFY(so->so_cfil);
2610 if (tmp != NULL)
2611 CFIL_LOG(LOG_DEBUG,
2612 "%llx first %llu peeked %llu pass %llu peek %llu"
2613 "datalen %u copylen %u copyoffset %u",
2614 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
2615 currentoffset,
2616 entrybuf->cfe_peeked,
2617 entrybuf->cfe_pass_offset,
2618 entrybuf->cfe_peek_offset,
2619 datalen, copylen, copyoffset);
2620
2621 /*
2622 * Process data that has passed the filter
2623 */
2624 error = cfil_service_pending_queue(so, kcunit, outgoing);
2625 if (error != 0) {
2626 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
2627 error);
2628 goto done;
2629 }
2630
2631 /*
2632 * Dispatch disconnect events that could not be sent
2633 */
2634 if (so->so_cfil == NULL)
2635 goto done;
2636 else if (outgoing) {
2637 if ((so->so_cfil->cfi_flags & CFIF_SHUT_WR) &&
2638 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
2639 cfil_dispatch_disconnect_event(so, kcunit, 1);
2640 } else {
2641 if ((so->so_cfil->cfi_flags & CFIF_SHUT_RD) &&
2642 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
2643 cfil_dispatch_disconnect_event(so, kcunit, 0);
2644 }
2645
2646 done:
2647 CFIL_LOG(LOG_DEBUG,
2648 "first %llu peeked %llu pass %llu peek %llu",
2649 entrybuf->cfe_ctl_q.q_start,
2650 entrybuf->cfe_peeked,
2651 entrybuf->cfe_pass_offset,
2652 entrybuf->cfe_peek_offset);
2653
2654 CFIL_INFO_VERIFY(so->so_cfil);
2655 return (error);
2656 }
2657
2658 /*
2659 * cfil_data_filter()
2660 *
2661 * Process data for a content filter installed on a socket
2662 */
2663 int
2664 cfil_data_filter(struct socket *so, uint32_t kcunit, int outgoing,
2665 struct mbuf *data, uint64_t datalen)
2666 {
2667 errno_t error = 0;
2668 struct cfil_entry *entry;
2669 struct cfe_buf *entrybuf;
2670
2671 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2672 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2673
2674 socket_lock_assert_owned(so);
2675
2676 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2677 if (outgoing)
2678 entrybuf = &entry->cfe_snd;
2679 else
2680 entrybuf = &entry->cfe_rcv;
2681
2682 /* Are we attached to the filter? */
2683 if (entry->cfe_filter == NULL) {
2684 error = 0;
2685 goto done;
2686 }
2687
2688 /* Dispatch to filters */
2689 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
2690 if (outgoing)
2691 OSAddAtomic64(datalen,
2692 &cfil_stats.cfs_ctl_q_out_enqueued);
2693 else
2694 OSAddAtomic64(datalen,
2695 &cfil_stats.cfs_ctl_q_in_enqueued);
2696
2697 error = cfil_data_service_ctl_q(so, kcunit, outgoing);
2698 if (error != 0) {
2699 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
2700 error);
2701 }
2702 /*
2703 * We have to return EJUSTRETURN in all cases to avoid double free
2704 * by socket layer
2705 */
2706 error = EJUSTRETURN;
2707 done:
2708 CFIL_INFO_VERIFY(so->so_cfil);
2709
2710 CFIL_LOG(LOG_INFO, "return %d", error);
2711 return (error);
2712 }
2713
2714 /*
2715 * cfil_service_inject_queue() re-inject data that passed the
2716 * content filters
2717 */
2718 static int
2719 cfil_service_inject_queue(struct socket *so, int outgoing)
2720 {
2721 mbuf_t data;
2722 unsigned int datalen;
2723 int mbcnt;
2724 unsigned int copylen;
2725 errno_t error = 0;
2726 struct mbuf *copy = NULL;
2727 struct cfi_buf *cfi_buf;
2728 struct cfil_queue *inject_q;
2729 int need_rwakeup = 0;
2730
2731 if (so->so_cfil == NULL)
2732 return (0);
2733
2734 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
2735 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
2736
2737 socket_lock_assert_owned(so);
2738
2739 if (outgoing) {
2740 cfi_buf = &so->so_cfil->cfi_snd;
2741 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
2742 } else {
2743 cfi_buf = &so->so_cfil->cfi_rcv;
2744 so->so_cfil->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
2745 }
2746 inject_q = &cfi_buf->cfi_inject_q;
2747
2748 while ((data = cfil_queue_first(inject_q)) != NULL) {
2749 datalen = cfil_data_length(data, &mbcnt);
2750
2751 CFIL_LOG(LOG_INFO, "data %llx datalen %u",
2752 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen);
2753
2754 /* Make a copy in case of injection error */
2755 copy = m_copym_mode(data, 0, M_COPYALL, M_DONTWAIT,
2756 M_COPYM_COPY_HDR);
2757 if (copy == NULL) {
2758 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2759 error = ENOMEM;
2760 break;
2761 }
2762
2763 if ((copylen = m_length(copy)) != datalen)
2764 panic("%s so %p copylen %d != datalen %d",
2765 __func__, so, copylen, datalen);
2766
2767 if (outgoing) {
2768 socket_unlock(so, 0);
2769
2770 /*
2771 * Set both DONTWAIT and NBIO flags are we really
2772 * do not want to block
2773 */
2774 error = sosend(so, NULL, NULL,
2775 copy, NULL,
2776 MSG_SKIPCFIL | MSG_DONTWAIT | MSG_NBIO);
2777
2778 socket_lock(so, 0);
2779
2780 if (error != 0) {
2781 CFIL_LOG(LOG_ERR, "sosend() failed %d",
2782 error);
2783 }
2784 } else {
2785 copy->m_flags |= M_SKIPCFIL;
2786
2787 /*
2788 * NOTE:
2789 * This work only because we support plain TCP
2790 * For UDP, RAWIP, MPTCP and message TCP we'll
2791 * need to call the appropriate sbappendxxx()
2792 * of fix sock_inject_data_in()
2793 */
2794 if (sbappendstream(&so->so_rcv, copy))
2795 need_rwakeup = 1;
2796 }
2797
2798 /* Need to reassess if filter is still attached after unlock */
2799 if (so->so_cfil == NULL) {
2800 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
2801 (uint64_t)VM_KERNEL_ADDRPERM(so));
2802 OSIncrementAtomic(&cfil_stats.cfs_inject_q_detached);
2803 error = 0;
2804 break;
2805 }
2806 if (error != 0)
2807 break;
2808
2809 /* Injection successful */
2810 cfil_queue_remove(inject_q, data, datalen);
2811 mbuf_freem(data);
2812
2813 cfi_buf->cfi_pending_first += datalen;
2814 cfi_buf->cfi_pending_mbcnt -= mbcnt;
2815 cfil_info_buf_verify(cfi_buf);
2816
2817 if (outgoing)
2818 OSAddAtomic64(datalen,
2819 &cfil_stats.cfs_inject_q_out_passed);
2820 else
2821 OSAddAtomic64(datalen,
2822 &cfil_stats.cfs_inject_q_in_passed);
2823 }
2824
2825 /* A single wakeup is for several packets is more efficient */
2826 if (need_rwakeup)
2827 sorwakeup(so);
2828
2829 if (error != 0 && so->so_cfil) {
2830 if (error == ENOBUFS)
2831 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
2832 if (error == ENOMEM)
2833 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
2834
2835 if (outgoing) {
2836 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_OUT;
2837 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
2838 } else {
2839 so->so_cfil->cfi_flags |= CFIF_RETRY_INJECT_IN;
2840 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
2841 }
2842 }
2843
2844 /*
2845 * Notify
2846 */
2847 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_SHUT_WR)) {
2848 cfil_sock_notify_shutdown(so, SHUT_WR);
2849 if (cfil_sock_data_pending(&so->so_snd) == 0)
2850 soshutdownlock_final(so, SHUT_WR);
2851 }
2852 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
2853 if (cfil_filters_attached(so) == 0) {
2854 CFIL_LOG(LOG_INFO, "so %llx waking",
2855 (uint64_t)VM_KERNEL_ADDRPERM(so));
2856 wakeup((caddr_t)&so->so_cfil);
2857 }
2858 }
2859
2860 CFIL_INFO_VERIFY(so->so_cfil);
2861
2862 return (error);
2863 }
2864
2865 static int
2866 cfil_service_pending_queue(struct socket *so, uint32_t kcunit, int outgoing)
2867 {
2868 uint64_t passlen, curlen;
2869 mbuf_t data;
2870 unsigned int datalen;
2871 errno_t error = 0;
2872 struct cfil_entry *entry;
2873 struct cfe_buf *entrybuf;
2874 struct cfil_queue *pending_q;
2875
2876 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2877 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2878
2879 socket_lock_assert_owned(so);
2880
2881 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2882 if (outgoing)
2883 entrybuf = &entry->cfe_snd;
2884 else
2885 entrybuf = &entry->cfe_rcv;
2886
2887 pending_q = &entrybuf->cfe_pending_q;
2888
2889 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
2890
2891 /*
2892 * Locate the chunks of data that we can pass to the next filter
2893 * A data chunk must be on mbuf boundaries
2894 */
2895 curlen = 0;
2896 while ((data = cfil_queue_first(pending_q)) != NULL) {
2897 datalen = cfil_data_length(data, NULL);
2898
2899 CFIL_LOG(LOG_INFO,
2900 "data %llx datalen %u passlen %llu curlen %llu",
2901 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
2902 passlen, curlen);
2903
2904 if (curlen + datalen > passlen)
2905 break;
2906
2907 cfil_queue_remove(pending_q, data, datalen);
2908
2909 curlen += datalen;
2910
2911 for (kcunit += 1;
2912 kcunit <= MAX_CONTENT_FILTER;
2913 kcunit++) {
2914 error = cfil_data_filter(so, kcunit, outgoing,
2915 data, datalen);
2916 /* 0 means passed so we can continue */
2917 if (error != 0)
2918 break;
2919 }
2920 /* When data has passed all filters, re-inject */
2921 if (error == 0) {
2922 if (outgoing) {
2923 cfil_queue_enqueue(
2924 &so->so_cfil->cfi_snd.cfi_inject_q,
2925 data, datalen);
2926 OSAddAtomic64(datalen,
2927 &cfil_stats.cfs_inject_q_out_enqueued);
2928 } else {
2929 cfil_queue_enqueue(
2930 &so->so_cfil->cfi_rcv.cfi_inject_q,
2931 data, datalen);
2932 OSAddAtomic64(datalen,
2933 &cfil_stats.cfs_inject_q_in_enqueued);
2934 }
2935 }
2936 }
2937
2938 CFIL_INFO_VERIFY(so->so_cfil);
2939
2940 return (error);
2941 }
2942
2943 int
2944 cfil_update_data_offsets(struct socket *so, uint32_t kcunit, int outgoing,
2945 uint64_t pass_offset, uint64_t peek_offset)
2946 {
2947 errno_t error = 0;
2948 struct cfil_entry *entry = NULL;
2949 struct cfe_buf *entrybuf;
2950 int updated = 0;
2951
2952 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
2953
2954 socket_lock_assert_owned(so);
2955
2956 if (so->so_cfil == NULL) {
2957 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
2958 (uint64_t)VM_KERNEL_ADDRPERM(so));
2959 error = 0;
2960 goto done;
2961 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
2962 CFIL_LOG(LOG_ERR, "so %llx drop set",
2963 (uint64_t)VM_KERNEL_ADDRPERM(so));
2964 error = EPIPE;
2965 goto done;
2966 }
2967
2968 entry = &so->so_cfil->cfi_entries[kcunit - 1];
2969 if (outgoing)
2970 entrybuf = &entry->cfe_snd;
2971 else
2972 entrybuf = &entry->cfe_rcv;
2973
2974 /* Record updated offsets for this content filter */
2975 if (pass_offset > entrybuf->cfe_pass_offset) {
2976 entrybuf->cfe_pass_offset = pass_offset;
2977
2978 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
2979 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
2980 updated = 1;
2981 } else {
2982 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
2983 pass_offset, entrybuf->cfe_pass_offset);
2984 }
2985 /* Filter does not want or need to see data that's allowed to pass */
2986 if (peek_offset > entrybuf->cfe_pass_offset &&
2987 peek_offset > entrybuf->cfe_peek_offset) {
2988 entrybuf->cfe_peek_offset = peek_offset;
2989 updated = 1;
2990 }
2991 /* Nothing to do */
2992 if (updated == 0)
2993 goto done;
2994
2995 /* Move data held in control queue to pending queue if needed */
2996 error = cfil_data_service_ctl_q(so, kcunit, outgoing);
2997 if (error != 0) {
2998 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
2999 error);
3000 goto done;
3001 }
3002 error = EJUSTRETURN;
3003
3004 done:
3005 /*
3006 * The filter is effectively detached when pass all from both sides
3007 * or when the socket is closed and no more data is waiting
3008 * to be delivered to the filter
3009 */
3010 if (entry != NULL &&
3011 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3012 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
3013 ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
3014 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3015 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3016 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3017 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3018 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3019 if ((so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT) &&
3020 cfil_filters_attached(so) == 0) {
3021 CFIL_LOG(LOG_INFO, "so %llx waking",
3022 (uint64_t)VM_KERNEL_ADDRPERM(so));
3023 wakeup((caddr_t)&so->so_cfil);
3024 }
3025 }
3026 CFIL_INFO_VERIFY(so->so_cfil);
3027 CFIL_LOG(LOG_INFO, "return %d", error);
3028 return (error);
3029 }
3030
3031 /*
3032 * Update pass offset for socket when no data is pending
3033 */
3034 static int
3035 cfil_set_socket_pass_offset(struct socket *so, int outgoing)
3036 {
3037 struct cfi_buf *cfi_buf;
3038 struct cfil_entry *entry;
3039 struct cfe_buf *entrybuf;
3040 uint32_t kcunit;
3041 uint64_t pass_offset = 0;
3042
3043 if (so->so_cfil == NULL)
3044 return (0);
3045
3046 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
3047 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3048
3049 socket_lock_assert_owned(so);
3050
3051 if (outgoing)
3052 cfi_buf = &so->so_cfil->cfi_snd;
3053 else
3054 cfi_buf = &so->so_cfil->cfi_rcv;
3055
3056 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
3057 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3058 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3059
3060 /* Are we attached to a filter? */
3061 if (entry->cfe_filter == NULL)
3062 continue;
3063
3064 if (outgoing)
3065 entrybuf = &entry->cfe_snd;
3066 else
3067 entrybuf = &entry->cfe_rcv;
3068
3069 if (pass_offset == 0 ||
3070 entrybuf->cfe_pass_offset < pass_offset)
3071 pass_offset = entrybuf->cfe_pass_offset;
3072 }
3073 cfi_buf->cfi_pass_offset = pass_offset;
3074 }
3075
3076 return (0);
3077 }
3078
3079 int
3080 cfil_action_data_pass(struct socket *so, uint32_t kcunit, int outgoing,
3081 uint64_t pass_offset, uint64_t peek_offset)
3082 {
3083 errno_t error = 0;
3084
3085 CFIL_LOG(LOG_INFO, "");
3086
3087 socket_lock_assert_owned(so);
3088
3089 error = cfil_acquire_sockbuf(so, outgoing);
3090 if (error != 0) {
3091 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
3092 (uint64_t)VM_KERNEL_ADDRPERM(so),
3093 outgoing ? "out" : "in");
3094 goto release;
3095 }
3096
3097 error = cfil_update_data_offsets(so, kcunit, outgoing,
3098 pass_offset, peek_offset);
3099
3100 cfil_service_inject_queue(so, outgoing);
3101
3102 cfil_set_socket_pass_offset(so, outgoing);
3103 release:
3104 CFIL_INFO_VERIFY(so->so_cfil);
3105 cfil_release_sockbuf(so, outgoing);
3106
3107 return (error);
3108 }
3109
3110
3111 static void
3112 cfil_flush_queues(struct socket *so)
3113 {
3114 struct cfil_entry *entry;
3115 int kcunit;
3116 uint64_t drained;
3117
3118 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3119 goto done;
3120
3121 socket_lock_assert_owned(so);
3122
3123 /*
3124 * Flush the output queues and ignore errors as long as
3125 * we are attached
3126 */
3127 (void) cfil_acquire_sockbuf(so, 1);
3128 if (so->so_cfil != NULL) {
3129 drained = 0;
3130 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3131 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3132
3133 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
3134 drained += cfil_queue_drain(
3135 &entry->cfe_snd.cfe_pending_q);
3136 }
3137 drained += cfil_queue_drain(&so->so_cfil->cfi_snd.cfi_inject_q);
3138 if (drained) {
3139 if (so->so_cfil->cfi_flags & CFIF_DROP)
3140 OSIncrementAtomic(
3141 &cfil_stats.cfs_flush_out_drop);
3142 else
3143 OSIncrementAtomic(
3144 &cfil_stats.cfs_flush_out_close);
3145 }
3146 }
3147 cfil_release_sockbuf(so, 1);
3148
3149 /*
3150 * Flush the input queues
3151 */
3152 (void) cfil_acquire_sockbuf(so, 0);
3153 if (so->so_cfil != NULL) {
3154 drained = 0;
3155 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3156 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3157
3158 drained += cfil_queue_drain(
3159 &entry->cfe_rcv.cfe_ctl_q);
3160 drained += cfil_queue_drain(
3161 &entry->cfe_rcv.cfe_pending_q);
3162 }
3163 drained += cfil_queue_drain(&so->so_cfil->cfi_rcv.cfi_inject_q);
3164 if (drained) {
3165 if (so->so_cfil->cfi_flags & CFIF_DROP)
3166 OSIncrementAtomic(
3167 &cfil_stats.cfs_flush_in_drop);
3168 else
3169 OSIncrementAtomic(
3170 &cfil_stats.cfs_flush_in_close);
3171 }
3172 }
3173 cfil_release_sockbuf(so, 0);
3174 done:
3175 CFIL_INFO_VERIFY(so->so_cfil);
3176 }
3177
3178 int
3179 cfil_action_drop(struct socket *so, uint32_t kcunit)
3180 {
3181 errno_t error = 0;
3182 struct cfil_entry *entry;
3183 struct proc *p;
3184
3185 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3186 goto done;
3187
3188 socket_lock_assert_owned(so);
3189
3190 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3191
3192 /* Are we attached to the filter? */
3193 if (entry->cfe_filter == NULL)
3194 goto done;
3195
3196 so->so_cfil->cfi_flags |= CFIF_DROP;
3197
3198 p = current_proc();
3199
3200 /*
3201 * Force the socket to be marked defunct
3202 * (forcing fixed along with rdar://19391339)
3203 */
3204 error = sosetdefunct(p, so,
3205 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3206 FALSE);
3207
3208 /* Flush the socket buffer and disconnect */
3209 if (error == 0)
3210 error = sodefunct(p, so,
3211 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
3212
3213 /* The filter is done, mark as detached */
3214 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3215 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3216 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3217
3218 /* Pending data needs to go */
3219 cfil_flush_queues(so);
3220
3221 if (so->so_cfil && (so->so_cfil->cfi_flags & CFIF_CLOSE_WAIT)) {
3222 if (cfil_filters_attached(so) == 0) {
3223 CFIL_LOG(LOG_INFO, "so %llx waking",
3224 (uint64_t)VM_KERNEL_ADDRPERM(so));
3225 wakeup((caddr_t)&so->so_cfil);
3226 }
3227 }
3228 done:
3229 return (error);
3230 }
3231
3232 static int
3233 cfil_update_entry_offsets(struct socket *so, int outgoing, unsigned int datalen)
3234 {
3235 struct cfil_entry *entry;
3236 struct cfe_buf *entrybuf;
3237 uint32_t kcunit;
3238
3239 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
3240 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
3241
3242 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3243 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3244
3245 /* Are we attached to the filter? */
3246 if (entry->cfe_filter == NULL)
3247 continue;
3248
3249 if (outgoing)
3250 entrybuf = &entry->cfe_snd;
3251 else
3252 entrybuf = &entry->cfe_rcv;
3253
3254 entrybuf->cfe_ctl_q.q_start += datalen;
3255 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
3256 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
3257 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3258 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3259
3260 entrybuf->cfe_ctl_q.q_end += datalen;
3261
3262 entrybuf->cfe_pending_q.q_start += datalen;
3263 entrybuf->cfe_pending_q.q_end += datalen;
3264 }
3265 CFIL_INFO_VERIFY(so->so_cfil);
3266 return (0);
3267 }
3268
3269 int
3270 cfil_data_common(struct socket *so, int outgoing, struct sockaddr *to,
3271 struct mbuf *data, struct mbuf *control, uint32_t flags)
3272 {
3273 #pragma unused(to, control, flags)
3274 errno_t error = 0;
3275 unsigned int datalen;
3276 int mbcnt;
3277 int kcunit;
3278 struct cfi_buf *cfi_buf;
3279
3280 if (so->so_cfil == NULL) {
3281 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3282 (uint64_t)VM_KERNEL_ADDRPERM(so));
3283 error = 0;
3284 goto done;
3285 } else if (so->so_cfil->cfi_flags & CFIF_DROP) {
3286 CFIL_LOG(LOG_ERR, "so %llx drop set",
3287 (uint64_t)VM_KERNEL_ADDRPERM(so));
3288 error = EPIPE;
3289 goto done;
3290 }
3291
3292 datalen = cfil_data_length(data, &mbcnt);
3293
3294 CFIL_LOG(LOG_INFO, "so %llx %s m %llx len %u flags 0x%x nextpkt %llx",
3295 (uint64_t)VM_KERNEL_ADDRPERM(so),
3296 outgoing ? "out" : "in",
3297 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
3298 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt));
3299
3300 if (outgoing)
3301 cfi_buf = &so->so_cfil->cfi_snd;
3302 else
3303 cfi_buf = &so->so_cfil->cfi_rcv;
3304
3305 cfi_buf->cfi_pending_last += datalen;
3306 cfi_buf->cfi_pending_mbcnt += mbcnt;
3307 cfil_info_buf_verify(cfi_buf);
3308
3309 CFIL_LOG(LOG_INFO, "so %llx cfi_pending_last %llu cfi_pass_offset %llu",
3310 (uint64_t)VM_KERNEL_ADDRPERM(so),
3311 cfi_buf->cfi_pending_last,
3312 cfi_buf->cfi_pass_offset);
3313
3314 /* Fast path when below pass offset */
3315 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
3316 cfil_update_entry_offsets(so, outgoing, datalen);
3317 } else {
3318 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3319 error = cfil_data_filter(so, kcunit, outgoing, data,
3320 datalen);
3321 /* 0 means passed so continue with next filter */
3322 if (error != 0)
3323 break;
3324 }
3325 }
3326
3327 /* Move cursor if no filter claimed the data */
3328 if (error == 0) {
3329 cfi_buf->cfi_pending_first += datalen;
3330 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3331 cfil_info_buf_verify(cfi_buf);
3332 }
3333 done:
3334 CFIL_INFO_VERIFY(so->so_cfil);
3335
3336 return (error);
3337 }
3338
3339 /*
3340 * Callback from socket layer sosendxxx()
3341 */
3342 int
3343 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
3344 struct mbuf *data, struct mbuf *control, uint32_t flags)
3345 {
3346 int error = 0;
3347
3348 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3349 return (0);
3350
3351 socket_lock_assert_owned(so);
3352
3353 if (so->so_cfil->cfi_flags & CFIF_DROP) {
3354 CFIL_LOG(LOG_ERR, "so %llx drop set",
3355 (uint64_t)VM_KERNEL_ADDRPERM(so));
3356 return (EPIPE);
3357 }
3358 if (control != NULL) {
3359 CFIL_LOG(LOG_ERR, "so %llx control",
3360 (uint64_t)VM_KERNEL_ADDRPERM(so));
3361 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
3362 }
3363 if ((flags & MSG_OOB)) {
3364 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
3365 (uint64_t)VM_KERNEL_ADDRPERM(so));
3366 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
3367 }
3368 if ((so->so_snd.sb_flags & SB_LOCK) == 0)
3369 panic("so %p SB_LOCK not set", so);
3370
3371 if (so->so_snd.sb_cfil_thread != NULL)
3372 panic("%s sb_cfil_thread %p not NULL", __func__,
3373 so->so_snd.sb_cfil_thread);
3374
3375 error = cfil_data_common(so, 1, to, data, control, flags);
3376
3377 return (error);
3378 }
3379
3380 /*
3381 * Callback from socket layer sbappendxxx()
3382 */
3383 int
3384 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
3385 struct mbuf *data, struct mbuf *control, uint32_t flags)
3386 {
3387 int error = 0;
3388
3389 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3390 return (0);
3391
3392 socket_lock_assert_owned(so);
3393
3394 if (so->so_cfil->cfi_flags & CFIF_DROP) {
3395 CFIL_LOG(LOG_ERR, "so %llx drop set",
3396 (uint64_t)VM_KERNEL_ADDRPERM(so));
3397 return (EPIPE);
3398 }
3399 if (control != NULL) {
3400 CFIL_LOG(LOG_ERR, "so %llx control",
3401 (uint64_t)VM_KERNEL_ADDRPERM(so));
3402 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
3403 }
3404 if (data->m_type == MT_OOBDATA) {
3405 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
3406 (uint64_t)VM_KERNEL_ADDRPERM(so));
3407 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
3408 }
3409 error = cfil_data_common(so, 0, from, data, control, flags);
3410
3411 return (error);
3412 }
3413
3414 /*
3415 * Callback from socket layer soshutdownxxx()
3416 *
3417 * We may delay the shutdown write if there's outgoing data in process.
3418 *
3419 * There is no point in delaying the shutdown read because the process
3420 * indicated that it does not want to read anymore data.
3421 */
3422 int
3423 cfil_sock_shutdown(struct socket *so, int *how)
3424 {
3425 int error = 0;
3426
3427 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3428 goto done;
3429
3430 socket_lock_assert_owned(so);
3431
3432 CFIL_LOG(LOG_INFO, "so %llx how %d",
3433 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
3434
3435 /*
3436 * Check the state of the socket before the content filter
3437 */
3438 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
3439 /* read already shut down */
3440 error = ENOTCONN;
3441 goto done;
3442 }
3443 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
3444 /* write already shut down */
3445 error = ENOTCONN;
3446 goto done;
3447 }
3448
3449 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
3450 CFIL_LOG(LOG_ERR, "so %llx drop set",
3451 (uint64_t)VM_KERNEL_ADDRPERM(so));
3452 goto done;
3453 }
3454
3455 /*
3456 * shutdown read: SHUT_RD or SHUT_RDWR
3457 */
3458 if (*how != SHUT_WR) {
3459 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
3460 error = ENOTCONN;
3461 goto done;
3462 }
3463 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
3464 cfil_sock_notify_shutdown(so, SHUT_RD);
3465 }
3466 /*
3467 * shutdown write: SHUT_WR or SHUT_RDWR
3468 */
3469 if (*how != SHUT_RD) {
3470 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
3471 error = ENOTCONN;
3472 goto done;
3473 }
3474 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
3475 cfil_sock_notify_shutdown(so, SHUT_WR);
3476 /*
3477 * When outgoing data is pending, we delay the shutdown at the
3478 * protocol level until the content filters give the final
3479 * verdict on the pending data.
3480 */
3481 if (cfil_sock_data_pending(&so->so_snd) != 0) {
3482 /*
3483 * When shutting down the read and write sides at once
3484 * we can proceed to the final shutdown of the read
3485 * side. Otherwise, we just return.
3486 */
3487 if (*how == SHUT_WR) {
3488 error = EJUSTRETURN;
3489 } else if (*how == SHUT_RDWR) {
3490 *how = SHUT_RD;
3491 }
3492 }
3493 }
3494 done:
3495 return (error);
3496 }
3497
3498 /*
3499 * This is called when the socket is closed and there is no more
3500 * opportunity for filtering
3501 */
3502 void
3503 cfil_sock_is_closed(struct socket *so)
3504 {
3505 errno_t error = 0;
3506 int kcunit;
3507
3508 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3509 return;
3510
3511 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
3512
3513 socket_lock_assert_owned(so);
3514
3515 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3516 /* Let the filters know of the closing */
3517 error = cfil_dispatch_closed_event(so, kcunit);
3518 }
3519
3520 /* Last chance to push passed data out */
3521 error = cfil_acquire_sockbuf(so, 1);
3522 if (error == 0)
3523 cfil_service_inject_queue(so, 1);
3524 cfil_release_sockbuf(so, 1);
3525
3526 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
3527
3528 /* Pending data needs to go */
3529 cfil_flush_queues(so);
3530
3531 CFIL_INFO_VERIFY(so->so_cfil);
3532 }
3533
3534 /*
3535 * This is called when the socket is disconnected so let the filters
3536 * know about the disconnection and that no more data will come
3537 *
3538 * The how parameter has the same values as soshutown()
3539 */
3540 void
3541 cfil_sock_notify_shutdown(struct socket *so, int how)
3542 {
3543 errno_t error = 0;
3544 int kcunit;
3545
3546 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3547 return;
3548
3549 CFIL_LOG(LOG_INFO, "so %llx how %d",
3550 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
3551
3552 socket_lock_assert_owned(so);
3553
3554 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3555 /* Disconnect incoming side */
3556 if (how != SHUT_WR)
3557 error = cfil_dispatch_disconnect_event(so, kcunit, 0);
3558 /* Disconnect outgoing side */
3559 if (how != SHUT_RD)
3560 error = cfil_dispatch_disconnect_event(so, kcunit, 1);
3561 }
3562 }
3563
3564 static int
3565 cfil_filters_attached(struct socket *so)
3566 {
3567 struct cfil_entry *entry;
3568 uint32_t kcunit;
3569 int attached = 0;
3570
3571 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3572 return (0);
3573
3574 socket_lock_assert_owned(so);
3575
3576 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3577 entry = &so->so_cfil->cfi_entries[kcunit - 1];
3578
3579 /* Are we attached to the filter? */
3580 if (entry->cfe_filter == NULL)
3581 continue;
3582 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
3583 continue;
3584 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
3585 continue;
3586 attached = 1;
3587 break;
3588 }
3589
3590 return (attached);
3591 }
3592
3593 /*
3594 * This is called when the socket is closed and we are waiting for
3595 * the filters to gives the final pass or drop
3596 */
3597 void
3598 cfil_sock_close_wait(struct socket *so)
3599 {
3600 lck_mtx_t *mutex_held;
3601 struct timespec ts;
3602 int error;
3603
3604 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3605 return;
3606
3607 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
3608
3609 if (so->so_proto->pr_getlock != NULL)
3610 mutex_held = (*so->so_proto->pr_getlock)(so, 0);
3611 else
3612 mutex_held = so->so_proto->pr_domain->dom_mtx;
3613 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
3614
3615 while (cfil_filters_attached(so)) {
3616 /*
3617 * Notify the filters we are going away so they can detach
3618 */
3619 cfil_sock_notify_shutdown(so, SHUT_RDWR);
3620
3621 /*
3622 * Make sure we need to wait after the filter are notified
3623 * of the disconnection
3624 */
3625 if (cfil_filters_attached(so) == 0)
3626 break;
3627
3628 CFIL_LOG(LOG_INFO, "so %llx waiting",
3629 (uint64_t)VM_KERNEL_ADDRPERM(so));
3630
3631 ts.tv_sec = cfil_close_wait_timeout / 1000;
3632 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
3633 NSEC_PER_USEC * 1000;
3634
3635 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
3636 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
3637 error = msleep((caddr_t)&so->so_cfil, mutex_held,
3638 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
3639 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
3640
3641 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
3642 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
3643
3644 /*
3645 * Force close in case of timeout
3646 */
3647 if (error != 0) {
3648 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
3649 break;
3650 }
3651 }
3652
3653 }
3654
3655 /*
3656 * Returns the size of the data held by the content filter by using
3657 */
3658 int32_t
3659 cfil_sock_data_pending(struct sockbuf *sb)
3660 {
3661 struct socket *so = sb->sb_so;
3662 uint64_t pending = 0;
3663
3664 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
3665 struct cfi_buf *cfi_buf;
3666
3667 socket_lock_assert_owned(so);
3668
3669 if ((sb->sb_flags & SB_RECV) == 0)
3670 cfi_buf = &so->so_cfil->cfi_snd;
3671 else
3672 cfi_buf = &so->so_cfil->cfi_rcv;
3673
3674 pending = cfi_buf->cfi_pending_last -
3675 cfi_buf->cfi_pending_first;
3676
3677 /*
3678 * If we are limited by the "chars of mbufs used" roughly
3679 * adjust so we won't overcommit
3680 */
3681 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
3682 pending = cfi_buf->cfi_pending_mbcnt;
3683 }
3684
3685 VERIFY(pending < INT32_MAX);
3686
3687 return (int32_t)(pending);
3688 }
3689
3690 /*
3691 * Return the socket buffer space used by data being held by content filters
3692 * so processes won't clog the socket buffer
3693 */
3694 int32_t
3695 cfil_sock_data_space(struct sockbuf *sb)
3696 {
3697 struct socket *so = sb->sb_so;
3698 uint64_t pending = 0;
3699
3700 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
3701 so->so_snd.sb_cfil_thread != current_thread()) {
3702 struct cfi_buf *cfi_buf;
3703
3704 socket_lock_assert_owned(so);
3705
3706 if ((sb->sb_flags & SB_RECV) == 0)
3707 cfi_buf = &so->so_cfil->cfi_snd;
3708 else
3709 cfi_buf = &so->so_cfil->cfi_rcv;
3710
3711 pending = cfi_buf->cfi_pending_last -
3712 cfi_buf->cfi_pending_first;
3713
3714 /*
3715 * If we are limited by the "chars of mbufs used" roughly
3716 * adjust so we won't overcommit
3717 */
3718 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
3719 pending = cfi_buf->cfi_pending_mbcnt;
3720 }
3721
3722 VERIFY(pending < INT32_MAX);
3723
3724 return (int32_t)(pending);
3725 }
3726
3727 /*
3728 * A callback from the socket and protocol layer when data becomes
3729 * available in the socket buffer to give a chance for the content filter
3730 * to re-inject data that was held back
3731 */
3732 void
3733 cfil_sock_buf_update(struct sockbuf *sb)
3734 {
3735 int outgoing;
3736 int error;
3737 struct socket *so = sb->sb_so;
3738
3739 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
3740 return;
3741
3742 if (!cfil_sbtrim)
3743 return;
3744
3745 socket_lock_assert_owned(so);
3746
3747 if ((sb->sb_flags & SB_RECV) == 0) {
3748 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
3749 return;
3750 outgoing = 1;
3751 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
3752 } else {
3753 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
3754 return;
3755 outgoing = 0;
3756 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
3757 }
3758
3759 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
3760 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3761
3762 error = cfil_acquire_sockbuf(so, outgoing);
3763 if (error == 0)
3764 cfil_service_inject_queue(so, outgoing);
3765 cfil_release_sockbuf(so, outgoing);
3766 }
3767
3768 int
3769 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
3770 struct sysctl_req *req)
3771 {
3772 #pragma unused(oidp, arg1, arg2)
3773 int error = 0;
3774 size_t len = 0;
3775 u_int32_t i;
3776
3777 /* Read only */
3778 if (req->newptr != USER_ADDR_NULL)
3779 return (EPERM);
3780
3781 cfil_rw_lock_shared(&cfil_lck_rw);
3782
3783 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
3784 struct cfil_filter_stat filter_stat;
3785 struct content_filter *cfc = content_filters[i];
3786
3787 if (cfc == NULL)
3788 continue;
3789
3790 /* If just asking for the size */
3791 if (req->oldptr == USER_ADDR_NULL) {
3792 len += sizeof(struct cfil_filter_stat);
3793 continue;
3794 }
3795
3796 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
3797 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
3798 filter_stat.cfs_filter_id = cfc->cf_kcunit;
3799 filter_stat.cfs_flags = cfc->cf_flags;
3800 filter_stat.cfs_sock_count = cfc->cf_sock_count;
3801 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
3802
3803 error = SYSCTL_OUT(req, &filter_stat,
3804 sizeof (struct cfil_filter_stat));
3805 if (error != 0)
3806 break;
3807 }
3808 /* If just asking for the size */
3809 if (req->oldptr == USER_ADDR_NULL)
3810 req->oldidx = len;
3811
3812 cfil_rw_unlock_shared(&cfil_lck_rw);
3813
3814 return (error);
3815 }
3816
3817 static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
3818 struct sysctl_req *req)
3819 {
3820 #pragma unused(oidp, arg1, arg2)
3821 int error = 0;
3822 u_int32_t i;
3823 struct cfil_info *cfi;
3824
3825 /* Read only */
3826 if (req->newptr != USER_ADDR_NULL)
3827 return (EPERM);
3828
3829 cfil_rw_lock_shared(&cfil_lck_rw);
3830
3831 /*
3832 * If just asking for the size,
3833 */
3834 if (req->oldptr == USER_ADDR_NULL) {
3835 req->oldidx = cfil_sock_attached_count *
3836 sizeof(struct cfil_sock_stat);
3837 /* Bump the length in case new sockets gets attached */
3838 req->oldidx += req->oldidx >> 3;
3839 goto done;
3840 }
3841
3842 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
3843 struct cfil_entry *entry;
3844 struct cfil_sock_stat stat;
3845 struct socket *so = cfi->cfi_so;
3846
3847 bzero(&stat, sizeof(struct cfil_sock_stat));
3848 stat.cfs_len = sizeof(struct cfil_sock_stat);
3849 stat.cfs_sock_id = cfi->cfi_sock_id;
3850 stat.cfs_flags = cfi->cfi_flags;
3851
3852 if (so != NULL) {
3853 stat.cfs_pid = so->last_pid;
3854 memcpy(stat.cfs_uuid, so->last_uuid,
3855 sizeof(uuid_t));
3856 if (so->so_flags & SOF_DELEGATED) {
3857 stat.cfs_e_pid = so->e_pid;
3858 memcpy(stat.cfs_e_uuid, so->e_uuid,
3859 sizeof(uuid_t));
3860 } else {
3861 stat.cfs_e_pid = so->last_pid;
3862 memcpy(stat.cfs_e_uuid, so->last_uuid,
3863 sizeof(uuid_t));
3864 }
3865 }
3866
3867 stat.cfs_snd.cbs_pending_first =
3868 cfi->cfi_snd.cfi_pending_first;
3869 stat.cfs_snd.cbs_pending_last =
3870 cfi->cfi_snd.cfi_pending_last;
3871 stat.cfs_snd.cbs_inject_q_len =
3872 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
3873 stat.cfs_snd.cbs_pass_offset =
3874 cfi->cfi_snd.cfi_pass_offset;
3875
3876 stat.cfs_rcv.cbs_pending_first =
3877 cfi->cfi_rcv.cfi_pending_first;
3878 stat.cfs_rcv.cbs_pending_last =
3879 cfi->cfi_rcv.cfi_pending_last;
3880 stat.cfs_rcv.cbs_inject_q_len =
3881 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
3882 stat.cfs_rcv.cbs_pass_offset =
3883 cfi->cfi_rcv.cfi_pass_offset;
3884
3885 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
3886 struct cfil_entry_stat *estat;
3887 struct cfe_buf *ebuf;
3888 struct cfe_buf_stat *sbuf;
3889
3890 entry = &cfi->cfi_entries[i];
3891
3892 estat = &stat.ces_entries[i];
3893
3894 estat->ces_len = sizeof(struct cfil_entry_stat);
3895 estat->ces_filter_id = entry->cfe_filter ?
3896 entry->cfe_filter->cf_kcunit : 0;
3897 estat->ces_flags = entry->cfe_flags;
3898 estat->ces_necp_control_unit =
3899 entry->cfe_necp_control_unit;
3900
3901 estat->ces_last_event.tv_sec =
3902 (int64_t)entry->cfe_last_event.tv_sec;
3903 estat->ces_last_event.tv_usec =
3904 (int64_t)entry->cfe_last_event.tv_usec;
3905
3906 estat->ces_last_action.tv_sec =
3907 (int64_t)entry->cfe_last_action.tv_sec;
3908 estat->ces_last_action.tv_usec =
3909 (int64_t)entry->cfe_last_action.tv_usec;
3910
3911 ebuf = &entry->cfe_snd;
3912 sbuf = &estat->ces_snd;
3913 sbuf->cbs_pending_first =
3914 cfil_queue_offset_first(&ebuf->cfe_pending_q);
3915 sbuf->cbs_pending_last =
3916 cfil_queue_offset_last(&ebuf->cfe_pending_q);
3917 sbuf->cbs_ctl_first =
3918 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
3919 sbuf->cbs_ctl_last =
3920 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
3921 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
3922 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
3923 sbuf->cbs_peeked = ebuf->cfe_peeked;
3924
3925 ebuf = &entry->cfe_rcv;
3926 sbuf = &estat->ces_rcv;
3927 sbuf->cbs_pending_first =
3928 cfil_queue_offset_first(&ebuf->cfe_pending_q);
3929 sbuf->cbs_pending_last =
3930 cfil_queue_offset_last(&ebuf->cfe_pending_q);
3931 sbuf->cbs_ctl_first =
3932 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
3933 sbuf->cbs_ctl_last =
3934 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
3935 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
3936 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
3937 sbuf->cbs_peeked = ebuf->cfe_peeked;
3938 }
3939 error = SYSCTL_OUT(req, &stat,
3940 sizeof (struct cfil_sock_stat));
3941 if (error != 0)
3942 break;
3943 }
3944 done:
3945 cfil_rw_unlock_shared(&cfil_lck_rw);
3946
3947 return (error);
3948 }