]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
132 *
133 *
134 * ACTION MESSAGES
135 *
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
143 *
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
147 *
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
157 *
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
162 *
163 *
164 * PER SOCKET "struct cfil_info"
165 *
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
168 * socket.
169 *
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
174 * decision;
175 * - The inject queue for data that passed the filters and that needs
176 * to be re-injected;
177 * - A content filter specific state in a set of "struct cfil_entry"
178 *
179 *
180 * CONTENT FILTER STATE "struct cfil_entry"
181 *
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
184 *
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
188 *
189 * For each direction, "struct cfil_entry" maintains the following information:
190 * - The pass offset
191 * - The peek offset
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
197 *
198 *
199 * CONTENT FILTER QUEUES
200 *
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
204 *
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
207 * the list of mbufs.
208 *
209 * The data moves into the three content filter queues according to this
210 * sequence:
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
214 *
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
217 *
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
222 *
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
228 *
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
231 * TCP/IP socket.
232 *
233 *
234 * IMPACT ON FLOW CONTROL
235 *
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
238 *
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
242 * processing delays.
243 *
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
250 *
251 *
252 * LOCKING STRATEGY
253 *
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
257 * threads.
258 *
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
261 *
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
265 *
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
269 *
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
272 *
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
276 *
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
279 *
280 *
281 * LIMITATIONS
282 *
283 * - For TCP sockets only
284 *
285 * - Does not support TCP unordered messages
286 */
287
288 /*
289 * TO DO LIST
290 *
291 * SOONER:
292 *
293 * Deal with OOB
294 *
295 * LATER:
296 *
297 * If support datagram, enqueue control and address mbufs as well
298 */
299
300 #include <sys/types.h>
301 #include <sys/kern_control.h>
302 #include <sys/queue.h>
303 #include <sys/domain.h>
304 #include <sys/protosw.h>
305 #include <sys/syslog.h>
306 #include <sys/systm.h>
307 #include <sys/param.h>
308 #include <sys/mbuf.h>
309
310 #include <kern/locks.h>
311 #include <kern/zalloc.h>
312 #include <kern/debug.h>
313
314 #include <net/content_filter.h>
315 #include <net/content_filter_crypto.h>
316
317 #include <netinet/in_pcb.h>
318 #include <netinet/tcp.h>
319 #include <netinet/tcp_var.h>
320 #include <netinet/udp.h>
321 #include <netinet/udp_var.h>
322
323 #include <string.h>
324 #include <libkern/libkern.h>
325 #include <kern/sched_prim.h>
326 #include <kern/task.h>
327 #include <mach/task_info.h>
328
329 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
330 #define MAX_CONTENT_FILTER 2
331 #else
332 #define MAX_CONTENT_FILTER 8
333 #endif
334
335 struct cfil_entry;
336
337 /*
338 * The structure content_filter represents a user space content filter
339 * It's created and associated with a kernel control socket instance
340 */
341 struct content_filter {
342 kern_ctl_ref cf_kcref;
343 u_int32_t cf_kcunit;
344 u_int32_t cf_flags;
345
346 uint32_t cf_necp_control_unit;
347
348 uint32_t cf_sock_count;
349 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
350
351 cfil_crypto_state_t cf_crypto_state;
352 };
353
354 #define CFF_ACTIVE 0x01
355 #define CFF_DETACHING 0x02
356 #define CFF_FLOW_CONTROLLED 0x04
357
358 struct content_filter **content_filters = NULL;
359 uint32_t cfil_active_count = 0; /* Number of active content filters */
360 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
361 uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
362 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
363 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
364
365 static kern_ctl_ref cfil_kctlref = NULL;
366
367 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
368 static lck_attr_t *cfil_lck_attr = NULL;
369 static lck_grp_t *cfil_lck_grp = NULL;
370 decl_lck_rw_data(static, cfil_lck_rw);
371
372 #define CFIL_RW_LCK_MAX 8
373
374 int cfil_rw_nxt_lck = 0;
375 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
376
377 int cfil_rw_nxt_unlck = 0;
378 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
379
380 #define CONTENT_FILTER_ZONE_NAME "content_filter"
381 #define CONTENT_FILTER_ZONE_MAX 10
382 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
383
384
385 #define CFIL_INFO_ZONE_NAME "cfil_info"
386 #define CFIL_INFO_ZONE_MAX 1024
387 static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
388
389 MBUFQ_HEAD(cfil_mqhead);
390
391 struct cfil_queue {
392 uint64_t q_start; /* offset of first byte in queue */
393 uint64_t q_end; /* offset of last byte in queue */
394 struct cfil_mqhead q_mq;
395 };
396
397 /*
398 * struct cfil_entry
399 *
400 * The is one entry per content filter
401 */
402 struct cfil_entry {
403 TAILQ_ENTRY(cfil_entry) cfe_link;
404 SLIST_ENTRY(cfil_entry) cfe_order_link;
405 struct content_filter *cfe_filter;
406
407 struct cfil_info *cfe_cfil_info;
408 uint32_t cfe_flags;
409 uint32_t cfe_necp_control_unit;
410 struct timeval cfe_last_event; /* To user space */
411 struct timeval cfe_last_action; /* From user space */
412 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
413 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
414 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
415 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
416 boolean_t cfe_laddr_sent;
417
418 struct cfe_buf {
419 /*
420 * cfe_pending_q holds data that has been delivered to
421 * the filter and for which we are waiting for an action
422 */
423 struct cfil_queue cfe_pending_q;
424 /*
425 * This queue is for data that has not be delivered to
426 * the content filter (new data, pass peek or flow control)
427 */
428 struct cfil_queue cfe_ctl_q;
429
430 uint64_t cfe_pass_offset;
431 uint64_t cfe_peek_offset;
432 uint64_t cfe_peeked;
433 } cfe_snd, cfe_rcv;
434 };
435
436 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
437 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
438 #define CFEF_DATA_START 0x0004 /* can send data event */
439 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
440 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
441 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
442 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
443 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
444
445
446 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
447 struct timeval _tdiff; \
448 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
449 timersub(t1, t0, &_tdiff); \
450 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
451 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
452 (cfil)->cfi_op_list_ctr ++; \
453 }
454
455 struct cfil_hash_entry;
456
457 /*
458 * struct cfil_info
459 *
460 * There is a struct cfil_info per socket
461 */
462 struct cfil_info {
463 TAILQ_ENTRY(cfil_info) cfi_link;
464 TAILQ_ENTRY(cfil_info) cfi_link_stats;
465 struct socket *cfi_so;
466 uint64_t cfi_flags;
467 uint64_t cfi_sock_id;
468 struct timeval64 cfi_first_event;
469 uint32_t cfi_op_list_ctr;
470 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
471 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
472 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
473 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
474
475 int cfi_dir;
476 uint64_t cfi_byte_inbound_count;
477 uint64_t cfi_byte_outbound_count;
478
479 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
480 struct cfi_buf {
481 /*
482 * cfi_pending_first and cfi_pending_last describe the total
483 * amount of data outstanding for all the filters on
484 * this socket and data in the flow queue
485 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
486 */
487 uint64_t cfi_pending_first;
488 uint64_t cfi_pending_last;
489 uint32_t cfi_pending_mbcnt;
490 uint32_t cfi_pending_mbnum;
491 uint32_t cfi_tail_drop_cnt;
492 /*
493 * cfi_pass_offset is the minimum of all the filters
494 */
495 uint64_t cfi_pass_offset;
496 /*
497 * cfi_inject_q holds data that needs to be re-injected
498 * into the socket after filtering and that can
499 * be queued because of flow control
500 */
501 struct cfil_queue cfi_inject_q;
502 } cfi_snd, cfi_rcv;
503
504 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
505 struct cfil_hash_entry *cfi_hash_entry;
506 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
507 } __attribute__((aligned(8)));
508
509 #define CFIF_DROP 0x0001 /* drop action applied */
510 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
511 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
512 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
513 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
514 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
515 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
516 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
517 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
518
519 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
520 #define CFI_SHIFT_GENCNT 32
521 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
522 #define CFI_SHIFT_FLOWHASH 0
523
524 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
525
526 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
527 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
528
529 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
530 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
531
532 /*
533 * UDP Socket Support
534 */
535 LIST_HEAD(cfilhashhead, cfil_hash_entry);
536 #define CFILHASHSIZE 16
537 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
538 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
539 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
540 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
541 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
542 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
543 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
544 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
545 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
546 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
547 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
548
549 /*
550 * Periodic Statistics Report:
551 */
552 static struct thread *cfil_stats_report_thread;
553 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
554 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
555 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
556
557 /* This buffer must have same layout as struct cfil_msg_stats_report */
558 struct cfil_stats_report_buffer {
559 struct cfil_msg_hdr msghdr;
560 uint32_t count;
561 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
562 };
563 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
564 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
565
566 /*
567 * UDP Garbage Collection:
568 */
569 static struct thread *cfil_udp_gc_thread;
570 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
571 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
572 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
573 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
574
575 /*
576 * UDP flow queue thresholds
577 */
578 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
579 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
580 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
581 /*
582 * UDP flow queue threshold globals:
583 */
584 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
585 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
586
587 /*
588 * struct cfil_hash_entry
589 *
590 * Hash entry for cfil_info
591 */
592 struct cfil_hash_entry {
593 LIST_ENTRY(cfil_hash_entry) cfentry_link;
594 struct cfil_info *cfentry_cfil;
595 u_short cfentry_fport;
596 u_short cfentry_lport;
597 sa_family_t cfentry_family;
598 u_int32_t cfentry_flowhash;
599 u_int64_t cfentry_lastused;
600 union {
601 /* foreign host table entry */
602 struct in_addr_4in6 addr46;
603 struct in6_addr addr6;
604 } cfentry_faddr;
605 union {
606 /* local host table entry */
607 struct in_addr_4in6 addr46;
608 struct in6_addr addr6;
609 } cfentry_laddr;
610 };
611
612 /*
613 * struct cfil_db
614 *
615 * For each UDP socket, this is a hash table maintaining all cfil_info structs
616 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
617 */
618 struct cfil_db {
619 struct socket *cfdb_so;
620 uint32_t cfdb_count; /* Number of total content filters */
621 struct cfilhashhead *cfdb_hashbase;
622 u_long cfdb_hashmask;
623 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
624 };
625
626 /*
627 * CFIL specific mbuf tag:
628 * Save state of socket at the point of data entry into cfil.
629 * Use saved state for reinjection at protocol layer.
630 */
631 struct cfil_tag {
632 union sockaddr_in_4_6 cfil_faddr;
633 uint32_t cfil_so_state_change_cnt;
634 short cfil_so_options;
635 };
636
637 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
638 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
639 static struct zone *cfil_hash_entry_zone = NULL;
640
641 #define CFIL_DB_ZONE_NAME "cfil_db"
642 #define CFIL_DB_ZONE_MAX 1024
643 static struct zone *cfil_db_zone = NULL;
644
645 /*
646 * Statistics
647 */
648
649 struct cfil_stats cfil_stats;
650
651 /*
652 * For troubleshooting
653 */
654 int cfil_log_level = LOG_ERR;
655 int cfil_debug = 1;
656
657 // Debug controls added for selective debugging.
658 // Disabled for production. If enabled,
659 // these will have performance impact
660 #define LIFECYCLE_DEBUG 0
661 #define VERDICT_DEBUG 0
662 #define DATA_DEBUG 0
663 #define SHOW_DEBUG 0
664 #define GC_DEBUG 0
665 #define STATS_DEBUG 0
666
667 /*
668 * Sysctls for logs and statistics
669 */
670 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
671 struct sysctl_req *);
672 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
673 struct sysctl_req *);
674
675 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
676
677 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
678 &cfil_log_level, 0, "");
679
680 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
681 &cfil_debug, 0, "");
682
683 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
684 &cfil_sock_attached_count, 0, "");
685
686 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
687 &cfil_active_count, 0, "");
688
689 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
690 &cfil_close_wait_timeout, 0, "");
691
692 static int cfil_sbtrim = 1;
693 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
694 &cfil_sbtrim, 0, "");
695
696 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
697 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
698
699 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
700 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
701
702 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
703 &cfil_stats, cfil_stats, "");
704
705 /*
706 * Forward declaration to appease the compiler
707 */
708 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
709 uint64_t, uint64_t);
710 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
711 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
712 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
713 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
714 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
715 struct mbuf *, struct mbuf *, uint32_t);
716 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
717 struct mbuf *, uint64_t);
718 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
719 struct in_addr, u_int16_t);
720 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
721 struct in6_addr *, u_int16_t);
722
723 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
724 static void cfil_info_free(struct cfil_info *);
725 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
726 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
727 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
728 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
729 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
730 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
731 static void cfil_info_verify(struct cfil_info *);
732 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
733 uint64_t, uint64_t);
734 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
735 static void cfil_release_sockbuf(struct socket *, int);
736 static int cfil_filters_attached(struct socket *);
737
738 static void cfil_rw_lock_exclusive(lck_rw_t *);
739 static void cfil_rw_unlock_exclusive(lck_rw_t *);
740 static void cfil_rw_lock_shared(lck_rw_t *);
741 static void cfil_rw_unlock_shared(lck_rw_t *);
742 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
743 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
744
745 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
746 static errno_t cfil_db_init(struct socket *);
747 static void cfil_db_free(struct socket *so);
748 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
749 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
750 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
751 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
752 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
753 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
754 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
755 struct mbuf *, struct mbuf *, uint32_t);
756 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
757 static void cfil_sock_udp_is_closed(struct socket *);
758 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
759 static int cfil_sock_udp_shutdown(struct socket *, int *);
760 static void cfil_sock_udp_close_wait(struct socket *);
761 static void cfil_sock_udp_buf_update(struct sockbuf *);
762 static int cfil_filters_udp_attached(struct socket *, bool);
763 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
764 struct in6_addr **, struct in6_addr **,
765 u_int16_t *, u_int16_t *);
766 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
767 struct in_addr *, struct in_addr *,
768 u_int16_t *, u_int16_t *);
769 static void cfil_info_log(int, struct cfil_info *, const char *);
770 void cfil_filter_show(u_int32_t);
771 void cfil_info_show(void);
772 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
773 bool cfil_info_action_timed_out(struct cfil_info *, int);
774 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
775 struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
776 static void cfil_udp_gc_thread_func(void *, wait_result_t);
777 static void cfil_info_udp_expire(void *, wait_result_t);
778 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
779 static void cfil_sock_received_verdict(struct socket *so);
780 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
781 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
782 boolean_t, boolean_t);
783 static void cfil_stats_report_thread_func(void *, wait_result_t);
784 static void cfil_stats_report(void *v, wait_result_t w);
785
786 bool check_port(struct sockaddr *, u_short);
787
788 /*
789 * Content filter global read write lock
790 */
791
792 static void
793 cfil_rw_lock_exclusive(lck_rw_t *lck)
794 {
795 void *lr_saved;
796
797 lr_saved = __builtin_return_address(0);
798
799 lck_rw_lock_exclusive(lck);
800
801 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
802 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
803 }
804
805 static void
806 cfil_rw_unlock_exclusive(lck_rw_t *lck)
807 {
808 void *lr_saved;
809
810 lr_saved = __builtin_return_address(0);
811
812 lck_rw_unlock_exclusive(lck);
813
814 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
815 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
816 }
817
818 static void
819 cfil_rw_lock_shared(lck_rw_t *lck)
820 {
821 void *lr_saved;
822
823 lr_saved = __builtin_return_address(0);
824
825 lck_rw_lock_shared(lck);
826
827 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
828 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
829 }
830
831 static void
832 cfil_rw_unlock_shared(lck_rw_t *lck)
833 {
834 void *lr_saved;
835
836 lr_saved = __builtin_return_address(0);
837
838 lck_rw_unlock_shared(lck);
839
840 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
841 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
842 }
843
844 static boolean_t
845 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
846 {
847 void *lr_saved;
848 boolean_t upgraded;
849
850 lr_saved = __builtin_return_address(0);
851
852 upgraded = lck_rw_lock_shared_to_exclusive(lck);
853 if (upgraded) {
854 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
855 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
856 }
857 return upgraded;
858 }
859
860 static void
861 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
862 {
863 void *lr_saved;
864
865 lr_saved = __builtin_return_address(0);
866
867 lck_rw_lock_exclusive_to_shared(lck);
868
869 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
870 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
871 }
872
873 static void
874 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
875 {
876 #if !MACH_ASSERT
877 #pragma unused(lck, exclusive)
878 #endif
879 LCK_RW_ASSERT(lck,
880 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
881 }
882
883 /*
884 * Return the number of bytes in the mbuf chain using the same
885 * method as m_length() or sballoc()
886 *
887 * Returns data len - starting from PKT start
888 * - retmbcnt - optional param to get total mbuf bytes in chain
889 * - retmbnum - optional param to get number of mbufs in chain
890 */
891 static unsigned int
892 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
893 {
894 struct mbuf *m0;
895 unsigned int pktlen = 0;
896 int mbcnt;
897 int mbnum;
898
899 // Locate the start of data
900 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
901 if (m0->m_flags & M_PKTHDR) {
902 break;
903 }
904 }
905 if (m0 == NULL) {
906 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
907 return 0;
908 }
909 m = m0;
910
911 if (retmbcnt == NULL && retmbnum == NULL) {
912 return m_length(m);
913 }
914
915 pktlen = 0;
916 mbcnt = 0;
917 mbnum = 0;
918 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
919 pktlen += m0->m_len;
920 mbnum++;
921 mbcnt += MSIZE;
922 if (m0->m_flags & M_EXT) {
923 mbcnt += m0->m_ext.ext_size;
924 }
925 }
926 if (retmbcnt) {
927 *retmbcnt = mbcnt;
928 }
929 if (retmbnum) {
930 *retmbnum = mbnum;
931 }
932 return pktlen;
933 }
934
935 static struct mbuf *
936 cfil_data_start(struct mbuf *m)
937 {
938 struct mbuf *m0;
939
940 // Locate the start of data
941 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
942 if (m0->m_flags & M_PKTHDR) {
943 break;
944 }
945 }
946 return m0;
947 }
948
949 /*
950 * Common mbuf queue utilities
951 */
952
953 static inline void
954 cfil_queue_init(struct cfil_queue *cfq)
955 {
956 cfq->q_start = 0;
957 cfq->q_end = 0;
958 MBUFQ_INIT(&cfq->q_mq);
959 }
960
961 static inline uint64_t
962 cfil_queue_drain(struct cfil_queue *cfq)
963 {
964 uint64_t drained = cfq->q_start - cfq->q_end;
965 cfq->q_start = 0;
966 cfq->q_end = 0;
967 MBUFQ_DRAIN(&cfq->q_mq);
968
969 return drained;
970 }
971
972 /* Return 1 when empty, 0 otherwise */
973 static inline int
974 cfil_queue_empty(struct cfil_queue *cfq)
975 {
976 return MBUFQ_EMPTY(&cfq->q_mq);
977 }
978
979 static inline uint64_t
980 cfil_queue_offset_first(struct cfil_queue *cfq)
981 {
982 return cfq->q_start;
983 }
984
985 static inline uint64_t
986 cfil_queue_offset_last(struct cfil_queue *cfq)
987 {
988 return cfq->q_end;
989 }
990
991 static inline uint64_t
992 cfil_queue_len(struct cfil_queue *cfq)
993 {
994 return cfq->q_end - cfq->q_start;
995 }
996
997 /*
998 * Routines to verify some fundamental assumptions
999 */
1000
1001 static void
1002 cfil_queue_verify(struct cfil_queue *cfq)
1003 {
1004 mbuf_t chain;
1005 mbuf_t m;
1006 mbuf_t n;
1007 uint64_t queuesize = 0;
1008
1009 /* Verify offset are ordered */
1010 VERIFY(cfq->q_start <= cfq->q_end);
1011
1012 /*
1013 * When queue is empty, the offsets are equal otherwise the offsets
1014 * are different
1015 */
1016 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1017 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1018 cfq->q_start != cfq->q_end));
1019
1020 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1021 size_t chainsize = 0;
1022 m = chain;
1023 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1024 // skip the addr and control stuff if present
1025 m = cfil_data_start(m);
1026
1027 if (m == NULL ||
1028 m == (void *)M_TAG_FREE_PATTERN ||
1029 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1030 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1031 panic("%s - mq %p is free at %p", __func__,
1032 &cfq->q_mq, m);
1033 }
1034 for (n = m; n != NULL; n = n->m_next) {
1035 if (n->m_type != MT_DATA &&
1036 n->m_type != MT_HEADER &&
1037 n->m_type != MT_OOBDATA) {
1038 panic("%s - %p unsupported type %u", __func__,
1039 n, n->m_type);
1040 }
1041 chainsize += n->m_len;
1042 }
1043 if (mlen != chainsize) {
1044 panic("%s - %p m_length() %u != chainsize %lu",
1045 __func__, m, mlen, chainsize);
1046 }
1047 queuesize += chainsize;
1048 }
1049 if (queuesize != cfq->q_end - cfq->q_start) {
1050 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1051 m, queuesize, cfq->q_end - cfq->q_start);
1052 }
1053 }
1054
1055 static void
1056 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1057 {
1058 CFIL_QUEUE_VERIFY(cfq);
1059
1060 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1061 cfq->q_end += len;
1062
1063 CFIL_QUEUE_VERIFY(cfq);
1064 }
1065
1066 static void
1067 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1068 {
1069 CFIL_QUEUE_VERIFY(cfq);
1070
1071 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1072
1073 MBUFQ_REMOVE(&cfq->q_mq, m);
1074 MBUFQ_NEXT(m) = NULL;
1075 cfq->q_start += len;
1076
1077 CFIL_QUEUE_VERIFY(cfq);
1078 }
1079
1080 static mbuf_t
1081 cfil_queue_first(struct cfil_queue *cfq)
1082 {
1083 return MBUFQ_FIRST(&cfq->q_mq);
1084 }
1085
1086 static mbuf_t
1087 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1088 {
1089 #pragma unused(cfq)
1090 return MBUFQ_NEXT(m);
1091 }
1092
1093 static void
1094 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1095 {
1096 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1097 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1098
1099 /* Verify the queues are ordered so that pending is before ctl */
1100 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1101
1102 /* The peek offset cannot be less than the pass offset */
1103 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1104
1105 /* Make sure we've updated the offset we peeked at */
1106 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1107 }
1108
1109 static void
1110 cfil_entry_verify(struct cfil_entry *entry)
1111 {
1112 cfil_entry_buf_verify(&entry->cfe_snd);
1113 cfil_entry_buf_verify(&entry->cfe_rcv);
1114 }
1115
1116 static void
1117 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1118 {
1119 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1120
1121 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1122 }
1123
1124 static void
1125 cfil_info_verify(struct cfil_info *cfil_info)
1126 {
1127 int i;
1128
1129 if (cfil_info == NULL) {
1130 return;
1131 }
1132
1133 cfil_info_buf_verify(&cfil_info->cfi_snd);
1134 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1135
1136 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1137 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1138 }
1139 }
1140
1141 static void
1142 verify_content_filter(struct content_filter *cfc)
1143 {
1144 struct cfil_entry *entry;
1145 uint32_t count = 0;
1146
1147 VERIFY(cfc->cf_sock_count >= 0);
1148
1149 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1150 count++;
1151 VERIFY(cfc == entry->cfe_filter);
1152 }
1153 VERIFY(count == cfc->cf_sock_count);
1154 }
1155
1156 /*
1157 * Kernel control socket callbacks
1158 */
1159 static errno_t
1160 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1161 void **unitinfo)
1162 {
1163 errno_t error = 0;
1164 struct content_filter *cfc = NULL;
1165
1166 CFIL_LOG(LOG_NOTICE, "");
1167
1168 cfc = zalloc(content_filter_zone);
1169 if (cfc == NULL) {
1170 CFIL_LOG(LOG_ERR, "zalloc failed");
1171 error = ENOMEM;
1172 goto done;
1173 }
1174 bzero(cfc, sizeof(struct content_filter));
1175
1176 cfil_rw_lock_exclusive(&cfil_lck_rw);
1177 if (content_filters == NULL) {
1178 struct content_filter **tmp;
1179
1180 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1181
1182 MALLOC(tmp,
1183 struct content_filter **,
1184 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1185 M_TEMP,
1186 M_WAITOK | M_ZERO);
1187
1188 cfil_rw_lock_exclusive(&cfil_lck_rw);
1189
1190 if (tmp == NULL && content_filters == NULL) {
1191 error = ENOMEM;
1192 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1193 goto done;
1194 }
1195 /* Another thread may have won the race */
1196 if (content_filters != NULL) {
1197 FREE(tmp, M_TEMP);
1198 } else {
1199 content_filters = tmp;
1200 }
1201 }
1202
1203 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1204 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1205 error = EINVAL;
1206 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1207 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1208 error = EADDRINUSE;
1209 } else {
1210 /*
1211 * kernel control socket kcunit numbers start at 1
1212 */
1213 content_filters[sac->sc_unit - 1] = cfc;
1214
1215 cfc->cf_kcref = kctlref;
1216 cfc->cf_kcunit = sac->sc_unit;
1217 TAILQ_INIT(&cfc->cf_sock_entries);
1218
1219 *unitinfo = cfc;
1220 cfil_active_count++;
1221
1222 // Allocate periodic stats buffer for this filter
1223 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1224 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1225
1226 struct cfil_stats_report_buffer *buf;
1227
1228 MALLOC(buf,
1229 struct cfil_stats_report_buffer *,
1230 sizeof(struct cfil_stats_report_buffer),
1231 M_TEMP,
1232 M_WAITOK | M_ZERO);
1233
1234 cfil_rw_lock_exclusive(&cfil_lck_rw);
1235
1236 if (buf == NULL) {
1237 error = ENOMEM;
1238 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1239 goto done;
1240 }
1241
1242 /* Another thread may have won the race */
1243 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1244 FREE(buf, M_TEMP);
1245 } else {
1246 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1247 }
1248 }
1249 }
1250 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1251 done:
1252 if (error != 0 && cfc != NULL) {
1253 zfree(content_filter_zone, cfc);
1254 }
1255
1256 if (error == 0) {
1257 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1258 } else {
1259 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1260 }
1261
1262 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1263 error, cfil_active_count, sac->sc_unit);
1264
1265 return error;
1266 }
1267
1268 static errno_t
1269 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1270 {
1271 #pragma unused(kctlref)
1272 errno_t error = 0;
1273 struct content_filter *cfc;
1274 struct cfil_entry *entry;
1275 uint64_t sock_flow_id = 0;
1276
1277 CFIL_LOG(LOG_NOTICE, "");
1278
1279 if (content_filters == NULL) {
1280 CFIL_LOG(LOG_ERR, "no content filter");
1281 error = EINVAL;
1282 goto done;
1283 }
1284 if (kcunit > MAX_CONTENT_FILTER) {
1285 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1286 kcunit, MAX_CONTENT_FILTER);
1287 error = EINVAL;
1288 goto done;
1289 }
1290
1291 cfc = (struct content_filter *)unitinfo;
1292 if (cfc == NULL) {
1293 goto done;
1294 }
1295
1296 cfil_rw_lock_exclusive(&cfil_lck_rw);
1297 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1298 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1299 kcunit);
1300 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1301 goto done;
1302 }
1303 cfc->cf_flags |= CFF_DETACHING;
1304 /*
1305 * Remove all sockets from the filter
1306 */
1307 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1308 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1309
1310 verify_content_filter(cfc);
1311 /*
1312 * Accept all outstanding data by pushing to next filter
1313 * or back to socket
1314 *
1315 * TBD: Actually we should make sure all data has been pushed
1316 * back to socket
1317 */
1318 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1319 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1320 struct socket *so = cfil_info->cfi_so;
1321 sock_flow_id = cfil_info->cfi_sock_id;
1322
1323 /* Need to let data flow immediately */
1324 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1325 CFEF_DATA_START;
1326
1327 /*
1328 * Respect locking hierarchy
1329 */
1330 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1331
1332 socket_lock(so, 1);
1333
1334 /*
1335 * When cfe_filter is NULL the filter is detached
1336 * and the entry has been removed from cf_sock_entries
1337 */
1338 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1339 cfil_rw_lock_exclusive(&cfil_lck_rw);
1340 goto release;
1341 }
1342
1343 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1344 CFM_MAX_OFFSET,
1345 CFM_MAX_OFFSET);
1346
1347 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1348 CFM_MAX_OFFSET,
1349 CFM_MAX_OFFSET);
1350
1351 cfil_rw_lock_exclusive(&cfil_lck_rw);
1352
1353 /*
1354 * Check again to make sure if the cfil_info is still valid
1355 * as the socket may have been unlocked when when calling
1356 * cfil_acquire_sockbuf()
1357 */
1358 if (entry->cfe_filter == NULL ||
1359 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1360 goto release;
1361 }
1362
1363 /* The filter is now detached */
1364 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1365 #if LIFECYCLE_DEBUG
1366 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1367 #endif
1368 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1369 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1370 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1371 cfil_filters_attached(so) == 0) {
1372 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1373 (uint64_t)VM_KERNEL_ADDRPERM(so));
1374 wakeup((caddr_t)cfil_info);
1375 }
1376
1377 /*
1378 * Remove the filter entry from the content filter
1379 * but leave the rest of the state intact as the queues
1380 * may not be empty yet
1381 */
1382 entry->cfe_filter = NULL;
1383 entry->cfe_necp_control_unit = 0;
1384
1385 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1386 cfc->cf_sock_count--;
1387 release:
1388 socket_unlock(so, 1);
1389 }
1390 }
1391 verify_content_filter(cfc);
1392
1393 /* Free the stats buffer for this filter */
1394 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1395 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1396 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1397 }
1398 VERIFY(cfc->cf_sock_count == 0);
1399
1400 /*
1401 * Make filter inactive
1402 */
1403 content_filters[kcunit - 1] = NULL;
1404 cfil_active_count--;
1405 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1406
1407 if (cfc->cf_crypto_state != NULL) {
1408 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1409 cfc->cf_crypto_state = NULL;
1410 }
1411
1412 zfree(content_filter_zone, cfc);
1413 done:
1414 if (error == 0) {
1415 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1416 } else {
1417 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1418 }
1419
1420 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1421 error, cfil_active_count, kcunit);
1422
1423 return error;
1424 }
1425
1426 /*
1427 * cfil_acquire_sockbuf()
1428 *
1429 * Prevent any other thread from acquiring the sockbuf
1430 * We use sb_cfil_thread as a semaphore to prevent other threads from
1431 * messing with the sockbuf -- see sblock()
1432 * Note: We do not set SB_LOCK here because the thread may check or modify
1433 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1434 * sblock(), sbunlock() or sodefunct()
1435 */
1436 static int
1437 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1438 {
1439 thread_t tp = current_thread();
1440 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1441 lck_mtx_t *mutex_held;
1442 int error = 0;
1443
1444 /*
1445 * Wait until no thread is holding the sockbuf and other content
1446 * filter threads have released the sockbuf
1447 */
1448 while ((sb->sb_flags & SB_LOCK) ||
1449 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1450 if (so->so_proto->pr_getlock != NULL) {
1451 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1452 } else {
1453 mutex_held = so->so_proto->pr_domain->dom_mtx;
1454 }
1455
1456 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1457
1458 sb->sb_wantlock++;
1459 VERIFY(sb->sb_wantlock != 0);
1460
1461 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1462 NULL);
1463
1464 VERIFY(sb->sb_wantlock != 0);
1465 sb->sb_wantlock--;
1466 }
1467 /*
1468 * Use reference count for repetitive calls on same thread
1469 */
1470 if (sb->sb_cfil_refs == 0) {
1471 VERIFY(sb->sb_cfil_thread == NULL);
1472 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1473
1474 sb->sb_cfil_thread = tp;
1475 sb->sb_flags |= SB_LOCK;
1476 }
1477 sb->sb_cfil_refs++;
1478
1479 /* We acquire the socket buffer when we need to cleanup */
1480 if (cfil_info == NULL) {
1481 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1482 (uint64_t)VM_KERNEL_ADDRPERM(so));
1483 error = 0;
1484 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1485 CFIL_LOG(LOG_ERR, "so %llx drop set",
1486 (uint64_t)VM_KERNEL_ADDRPERM(so));
1487 error = EPIPE;
1488 }
1489
1490 return error;
1491 }
1492
1493 static void
1494 cfil_release_sockbuf(struct socket *so, int outgoing)
1495 {
1496 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1497 thread_t tp = current_thread();
1498
1499 socket_lock_assert_owned(so);
1500
1501 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1502 panic("%s sb_cfil_thread %p not current %p", __func__,
1503 sb->sb_cfil_thread, tp);
1504 }
1505 /*
1506 * Don't panic if we are defunct because SB_LOCK has
1507 * been cleared by sodefunct()
1508 */
1509 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1510 panic("%s SB_LOCK not set on %p", __func__,
1511 sb);
1512 }
1513 /*
1514 * We can unlock when the thread unwinds to the last reference
1515 */
1516 sb->sb_cfil_refs--;
1517 if (sb->sb_cfil_refs == 0) {
1518 sb->sb_cfil_thread = NULL;
1519 sb->sb_flags &= ~SB_LOCK;
1520
1521 if (sb->sb_wantlock > 0) {
1522 wakeup(&sb->sb_flags);
1523 }
1524 }
1525 }
1526
1527 cfil_sock_id_t
1528 cfil_sock_id_from_socket(struct socket *so)
1529 {
1530 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1531 return so->so_cfil->cfi_sock_id;
1532 } else {
1533 return CFIL_SOCK_ID_NONE;
1534 }
1535 }
1536
1537 static bool
1538 cfil_socket_safe_lock(struct inpcb *inp)
1539 {
1540 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1541 socket_lock(inp->inp_socket, 1);
1542 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1543 return true;
1544 }
1545 socket_unlock(inp->inp_socket, 1);
1546 }
1547 return false;
1548 }
1549
1550 static struct socket *
1551 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1552 {
1553 struct socket *so = NULL;
1554 u_int64_t gencnt = cfil_sock_id >> 32;
1555 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1556 struct inpcb *inp = NULL;
1557 struct inpcbinfo *pcbinfo = NULL;
1558
1559 #if VERDICT_DEBUG
1560 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1561 #endif
1562
1563 if (udp_only) {
1564 goto find_udp;
1565 }
1566
1567 pcbinfo = &tcbinfo;
1568 lck_rw_lock_shared(pcbinfo->ipi_lock);
1569 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1570 if (inp->inp_state != INPCB_STATE_DEAD &&
1571 inp->inp_socket != NULL &&
1572 inp->inp_flowhash == flowhash &&
1573 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1574 inp->inp_socket->so_cfil != NULL) {
1575 if (cfil_socket_safe_lock(inp)) {
1576 so = inp->inp_socket;
1577 }
1578 break;
1579 }
1580 }
1581 lck_rw_done(pcbinfo->ipi_lock);
1582 if (so != NULL) {
1583 goto done;
1584 }
1585
1586 find_udp:
1587
1588 pcbinfo = &udbinfo;
1589 lck_rw_lock_shared(pcbinfo->ipi_lock);
1590 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1591 if (inp->inp_state != INPCB_STATE_DEAD &&
1592 inp->inp_socket != NULL &&
1593 inp->inp_socket->so_cfil_db != NULL &&
1594 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1595 if (cfil_socket_safe_lock(inp)) {
1596 so = inp->inp_socket;
1597 }
1598 break;
1599 }
1600 }
1601 lck_rw_done(pcbinfo->ipi_lock);
1602
1603 done:
1604 if (so == NULL) {
1605 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1606 CFIL_LOG(LOG_DEBUG,
1607 "no socket for sock_id %llx gencnt %llx flowhash %x",
1608 cfil_sock_id, gencnt, flowhash);
1609 }
1610
1611 return so;
1612 }
1613
1614 static struct socket *
1615 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1616 {
1617 struct socket *so = NULL;
1618 struct inpcb *inp = NULL;
1619 struct inpcbinfo *pcbinfo = &tcbinfo;
1620
1621 lck_rw_lock_shared(pcbinfo->ipi_lock);
1622 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1623 if (inp->inp_state != INPCB_STATE_DEAD &&
1624 inp->inp_socket != NULL &&
1625 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1626 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1627 if (cfil_socket_safe_lock(inp)) {
1628 so = inp->inp_socket;
1629 }
1630 break;
1631 }
1632 }
1633 lck_rw_done(pcbinfo->ipi_lock);
1634 if (so != NULL) {
1635 goto done;
1636 }
1637
1638 pcbinfo = &udbinfo;
1639 lck_rw_lock_shared(pcbinfo->ipi_lock);
1640 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1641 if (inp->inp_state != INPCB_STATE_DEAD &&
1642 inp->inp_socket != NULL &&
1643 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1644 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1645 if (cfil_socket_safe_lock(inp)) {
1646 so = inp->inp_socket;
1647 }
1648 break;
1649 }
1650 }
1651 lck_rw_done(pcbinfo->ipi_lock);
1652
1653 done:
1654 return so;
1655 }
1656
1657 static void
1658 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1659 {
1660 struct cfil_info *cfil = NULL;
1661 Boolean found = FALSE;
1662 int kcunit;
1663
1664 if (cfil_info == NULL) {
1665 return;
1666 }
1667
1668 if (report_frequency) {
1669 if (entry == NULL) {
1670 return;
1671 }
1672
1673 // Update stats reporting frequency.
1674 if (entry->cfe_stats_report_frequency != report_frequency) {
1675 entry->cfe_stats_report_frequency = report_frequency;
1676 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1677 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1678 }
1679 microuptime(&entry->cfe_stats_report_ts);
1680
1681 // Insert cfil_info into list only if it is not in yet.
1682 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1683 if (cfil == cfil_info) {
1684 return;
1685 }
1686 }
1687
1688 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1689
1690 // Wake up stats thread if this is first flow added
1691 if (cfil_sock_attached_stats_count == 0) {
1692 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1693 }
1694 cfil_sock_attached_stats_count++;
1695 #if STATS_DEBUG
1696 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1697 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1698 cfil_info->cfi_sock_id,
1699 entry->cfe_stats_report_frequency);
1700 #endif
1701 }
1702 } else {
1703 // Turn off stats reporting for this filter.
1704 if (entry != NULL) {
1705 // Already off, no change.
1706 if (entry->cfe_stats_report_frequency == 0) {
1707 return;
1708 }
1709
1710 entry->cfe_stats_report_frequency = 0;
1711 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1712 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1713 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1714 return;
1715 }
1716 }
1717 }
1718
1719 // No more filter asking for stats for this cfil_info, remove from list.
1720 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1721 found = FALSE;
1722 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1723 if (cfil == cfil_info) {
1724 found = TRUE;
1725 break;
1726 }
1727 }
1728 if (found) {
1729 cfil_sock_attached_stats_count--;
1730 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1731 #if STATS_DEBUG
1732 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1733 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1734 cfil_info->cfi_sock_id);
1735 #endif
1736 }
1737 }
1738 }
1739 }
1740
1741 static errno_t
1742 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1743 int flags)
1744 {
1745 #pragma unused(kctlref, flags)
1746 errno_t error = 0;
1747 struct cfil_msg_hdr *msghdr;
1748 struct content_filter *cfc = (struct content_filter *)unitinfo;
1749 struct socket *so;
1750 struct cfil_msg_action *action_msg;
1751 struct cfil_entry *entry;
1752 struct cfil_info *cfil_info = NULL;
1753 unsigned int data_len = 0;
1754
1755 CFIL_LOG(LOG_INFO, "");
1756
1757 if (content_filters == NULL) {
1758 CFIL_LOG(LOG_ERR, "no content filter");
1759 error = EINVAL;
1760 goto done;
1761 }
1762 if (kcunit > MAX_CONTENT_FILTER) {
1763 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1764 kcunit, MAX_CONTENT_FILTER);
1765 error = EINVAL;
1766 goto done;
1767 }
1768 if (m == NULL) {
1769 CFIL_LOG(LOG_ERR, "null mbuf");
1770 error = EINVAL;
1771 goto done;
1772 }
1773 data_len = m_length(m);
1774
1775 if (data_len < sizeof(struct cfil_msg_hdr)) {
1776 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1777 error = EINVAL;
1778 goto done;
1779 }
1780 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1781 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1782 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1783 error = EINVAL;
1784 goto done;
1785 }
1786 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1787 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1788 error = EINVAL;
1789 goto done;
1790 }
1791 if (msghdr->cfm_len > data_len) {
1792 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1793 error = EINVAL;
1794 goto done;
1795 }
1796
1797 /* Validate action operation */
1798 switch (msghdr->cfm_op) {
1799 case CFM_OP_DATA_UPDATE:
1800 OSIncrementAtomic(
1801 &cfil_stats.cfs_ctl_action_data_update);
1802 break;
1803 case CFM_OP_DROP:
1804 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1805 break;
1806 case CFM_OP_BLESS_CLIENT:
1807 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1808 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1809 error = EINVAL;
1810 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1811 msghdr->cfm_len,
1812 msghdr->cfm_op);
1813 goto done;
1814 }
1815 error = cfil_action_bless_client(kcunit, msghdr);
1816 goto done;
1817 case CFM_OP_SET_CRYPTO_KEY:
1818 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1819 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1820 error = EINVAL;
1821 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1822 msghdr->cfm_len,
1823 msghdr->cfm_op);
1824 goto done;
1825 }
1826 error = cfil_action_set_crypto_key(kcunit, msghdr);
1827 goto done;
1828 default:
1829 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1830 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1831 error = EINVAL;
1832 goto done;
1833 }
1834 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1835 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1836 error = EINVAL;
1837 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1838 msghdr->cfm_len,
1839 msghdr->cfm_op);
1840 goto done;
1841 }
1842 cfil_rw_lock_shared(&cfil_lck_rw);
1843 if (cfc != (void *)content_filters[kcunit - 1]) {
1844 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1845 kcunit);
1846 error = EINVAL;
1847 cfil_rw_unlock_shared(&cfil_lck_rw);
1848 goto done;
1849 }
1850 cfil_rw_unlock_shared(&cfil_lck_rw);
1851
1852 // Search for socket (TCP+UDP and lock so)
1853 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1854 if (so == NULL) {
1855 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1856 msghdr->cfm_sock_id);
1857 error = EINVAL;
1858 goto done;
1859 }
1860
1861 cfil_info = so->so_cfil_db != NULL ?
1862 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1863
1864 if (cfil_info == NULL) {
1865 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1866 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1867 error = EINVAL;
1868 goto unlock;
1869 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1870 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1871 (uint64_t)VM_KERNEL_ADDRPERM(so));
1872 error = EINVAL;
1873 goto unlock;
1874 }
1875 entry = &cfil_info->cfi_entries[kcunit - 1];
1876 if (entry->cfe_filter == NULL) {
1877 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1878 (uint64_t)VM_KERNEL_ADDRPERM(so));
1879 error = EINVAL;
1880 goto unlock;
1881 }
1882
1883 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1884 entry->cfe_flags |= CFEF_DATA_START;
1885 } else {
1886 CFIL_LOG(LOG_ERR,
1887 "so %llx attached not sent for %u",
1888 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1889 error = EINVAL;
1890 goto unlock;
1891 }
1892
1893 microuptime(&entry->cfe_last_action);
1894 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1895
1896 action_msg = (struct cfil_msg_action *)msghdr;
1897
1898 switch (msghdr->cfm_op) {
1899 case CFM_OP_DATA_UPDATE:
1900 #if VERDICT_DEBUG
1901 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1902 (uint64_t)VM_KERNEL_ADDRPERM(so),
1903 cfil_info->cfi_sock_id,
1904 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1905 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1906 #endif
1907 /*
1908 * Received verdict, at this point we know this
1909 * socket connection is allowed. Unblock thread
1910 * immediately before proceeding to process the verdict.
1911 */
1912 cfil_sock_received_verdict(so);
1913
1914 if (action_msg->cfa_out_peek_offset != 0 ||
1915 action_msg->cfa_out_pass_offset != 0) {
1916 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1917 action_msg->cfa_out_pass_offset,
1918 action_msg->cfa_out_peek_offset);
1919 }
1920 if (error == EJUSTRETURN) {
1921 error = 0;
1922 }
1923 if (error != 0) {
1924 break;
1925 }
1926 if (action_msg->cfa_in_peek_offset != 0 ||
1927 action_msg->cfa_in_pass_offset != 0) {
1928 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1929 action_msg->cfa_in_pass_offset,
1930 action_msg->cfa_in_peek_offset);
1931 }
1932 if (error == EJUSTRETURN) {
1933 error = 0;
1934 }
1935
1936 // Toggle stats reporting according to received verdict.
1937 cfil_rw_lock_exclusive(&cfil_lck_rw);
1938 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
1939 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1940
1941 break;
1942
1943 case CFM_OP_DROP:
1944 #if VERDICT_DEBUG
1945 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1946 (uint64_t)VM_KERNEL_ADDRPERM(so),
1947 cfil_info->cfi_sock_id,
1948 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1949 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1950 #endif
1951 error = cfil_action_drop(so, cfil_info, kcunit);
1952 cfil_sock_received_verdict(so);
1953 break;
1954
1955 default:
1956 error = EINVAL;
1957 break;
1958 }
1959 unlock:
1960 socket_unlock(so, 1);
1961 done:
1962 mbuf_freem(m);
1963
1964 if (error == 0) {
1965 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1966 } else {
1967 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1968 }
1969
1970 return error;
1971 }
1972
1973 static errno_t
1974 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1975 int opt, void *data, size_t *len)
1976 {
1977 #pragma unused(kctlref, opt)
1978 struct cfil_info *cfil_info = NULL;
1979 errno_t error = 0;
1980 struct content_filter *cfc = (struct content_filter *)unitinfo;
1981
1982 CFIL_LOG(LOG_NOTICE, "");
1983
1984 cfil_rw_lock_shared(&cfil_lck_rw);
1985
1986 if (content_filters == NULL) {
1987 CFIL_LOG(LOG_ERR, "no content filter");
1988 error = EINVAL;
1989 goto done;
1990 }
1991 if (kcunit > MAX_CONTENT_FILTER) {
1992 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1993 kcunit, MAX_CONTENT_FILTER);
1994 error = EINVAL;
1995 goto done;
1996 }
1997 if (cfc != (void *)content_filters[kcunit - 1]) {
1998 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1999 kcunit);
2000 error = EINVAL;
2001 goto done;
2002 }
2003 switch (opt) {
2004 case CFIL_OPT_NECP_CONTROL_UNIT:
2005 if (*len < sizeof(uint32_t)) {
2006 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2007 error = EINVAL;
2008 goto done;
2009 }
2010 if (data != NULL) {
2011 *(uint32_t *)data = cfc->cf_necp_control_unit;
2012 }
2013 break;
2014 case CFIL_OPT_GET_SOCKET_INFO:
2015 if (*len != sizeof(struct cfil_opt_sock_info)) {
2016 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2017 error = EINVAL;
2018 goto done;
2019 }
2020 if (data == NULL) {
2021 CFIL_LOG(LOG_ERR, "data not passed");
2022 error = EINVAL;
2023 goto done;
2024 }
2025
2026 struct cfil_opt_sock_info *sock_info =
2027 (struct cfil_opt_sock_info *) data;
2028
2029 // Unlock here so that we never hold both cfil_lck_rw and the
2030 // socket_lock at the same time. Otherwise, this can deadlock
2031 // because soclose() takes the socket_lock and then exclusive
2032 // cfil_lck_rw and we require the opposite order.
2033
2034 // WARNING: Be sure to never use anything protected
2035 // by cfil_lck_rw beyond this point.
2036 // WARNING: Be sure to avoid fallthrough and
2037 // goto return_already_unlocked from this branch.
2038 cfil_rw_unlock_shared(&cfil_lck_rw);
2039
2040 // Search (TCP+UDP) and lock socket
2041 struct socket *sock =
2042 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2043 if (sock == NULL) {
2044 #if LIFECYCLE_DEBUG
2045 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2046 sock_info->cfs_sock_id);
2047 #endif
2048 error = ENOENT;
2049 goto return_already_unlocked;
2050 }
2051
2052 cfil_info = (sock->so_cfil_db != NULL) ?
2053 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2054
2055 if (cfil_info == NULL) {
2056 #if LIFECYCLE_DEBUG
2057 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2058 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2059 #endif
2060 error = EINVAL;
2061 socket_unlock(sock, 1);
2062 goto return_already_unlocked;
2063 }
2064
2065 // Fill out family, type, and protocol
2066 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2067 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2068 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2069
2070 // Source and destination addresses
2071 struct inpcb *inp = sotoinpcb(sock);
2072 if (inp->inp_vflag & INP_IPV6) {
2073 struct in6_addr *laddr = NULL, *faddr = NULL;
2074 u_int16_t lport = 0, fport = 0;
2075
2076 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2077 &laddr, &faddr, &lport, &fport);
2078 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2079 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2080 } else if (inp->inp_vflag & INP_IPV4) {
2081 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2082 u_int16_t lport = 0, fport = 0;
2083
2084 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2085 &laddr, &faddr, &lport, &fport);
2086 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2087 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2088 }
2089
2090 // Set the pid info
2091 sock_info->cfs_pid = sock->last_pid;
2092 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2093
2094 if (sock->so_flags & SOF_DELEGATED) {
2095 sock_info->cfs_e_pid = sock->e_pid;
2096 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2097 } else {
2098 sock_info->cfs_e_pid = sock->last_pid;
2099 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2100 }
2101
2102 socket_unlock(sock, 1);
2103
2104 goto return_already_unlocked;
2105 default:
2106 error = ENOPROTOOPT;
2107 break;
2108 }
2109 done:
2110 cfil_rw_unlock_shared(&cfil_lck_rw);
2111
2112 return error;
2113
2114 return_already_unlocked:
2115
2116 return error;
2117 }
2118
2119 static errno_t
2120 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2121 int opt, void *data, size_t len)
2122 {
2123 #pragma unused(kctlref, opt)
2124 errno_t error = 0;
2125 struct content_filter *cfc = (struct content_filter *)unitinfo;
2126
2127 CFIL_LOG(LOG_NOTICE, "");
2128
2129 cfil_rw_lock_exclusive(&cfil_lck_rw);
2130
2131 if (content_filters == NULL) {
2132 CFIL_LOG(LOG_ERR, "no content filter");
2133 error = EINVAL;
2134 goto done;
2135 }
2136 if (kcunit > MAX_CONTENT_FILTER) {
2137 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2138 kcunit, MAX_CONTENT_FILTER);
2139 error = EINVAL;
2140 goto done;
2141 }
2142 if (cfc != (void *)content_filters[kcunit - 1]) {
2143 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2144 kcunit);
2145 error = EINVAL;
2146 goto done;
2147 }
2148 switch (opt) {
2149 case CFIL_OPT_NECP_CONTROL_UNIT:
2150 if (len < sizeof(uint32_t)) {
2151 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2152 "len too small %lu", len);
2153 error = EINVAL;
2154 goto done;
2155 }
2156 if (cfc->cf_necp_control_unit != 0) {
2157 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2158 "already set %u",
2159 cfc->cf_necp_control_unit);
2160 error = EINVAL;
2161 goto done;
2162 }
2163 cfc->cf_necp_control_unit = *(uint32_t *)data;
2164 break;
2165 default:
2166 error = ENOPROTOOPT;
2167 break;
2168 }
2169 done:
2170 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2171
2172 return error;
2173 }
2174
2175
2176 static void
2177 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2178 {
2179 #pragma unused(kctlref, flags)
2180 struct content_filter *cfc = (struct content_filter *)unitinfo;
2181 struct socket *so = NULL;
2182 int error;
2183 struct cfil_entry *entry;
2184 struct cfil_info *cfil_info = NULL;
2185
2186 CFIL_LOG(LOG_INFO, "");
2187
2188 if (content_filters == NULL) {
2189 CFIL_LOG(LOG_ERR, "no content filter");
2190 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2191 return;
2192 }
2193 if (kcunit > MAX_CONTENT_FILTER) {
2194 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2195 kcunit, MAX_CONTENT_FILTER);
2196 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2197 return;
2198 }
2199 cfil_rw_lock_shared(&cfil_lck_rw);
2200 if (cfc != (void *)content_filters[kcunit - 1]) {
2201 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2202 kcunit);
2203 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2204 goto done;
2205 }
2206 /* Let's assume the flow control is lifted */
2207 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2208 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2209 cfil_rw_lock_exclusive(&cfil_lck_rw);
2210 }
2211
2212 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2213
2214 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2215 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2216 }
2217 /*
2218 * Flow control will be raised again as soon as an entry cannot enqueue
2219 * to the kernel control socket
2220 */
2221 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2222 verify_content_filter(cfc);
2223
2224 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2225
2226 /* Find an entry that is flow controlled */
2227 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2228 if (entry->cfe_cfil_info == NULL ||
2229 entry->cfe_cfil_info->cfi_so == NULL) {
2230 continue;
2231 }
2232 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2233 continue;
2234 }
2235 }
2236 if (entry == NULL) {
2237 break;
2238 }
2239
2240 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2241
2242 cfil_info = entry->cfe_cfil_info;
2243 so = cfil_info->cfi_so;
2244
2245 cfil_rw_unlock_shared(&cfil_lck_rw);
2246 socket_lock(so, 1);
2247
2248 do {
2249 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2250 if (error == 0) {
2251 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2252 }
2253 cfil_release_sockbuf(so, 1);
2254 if (error != 0) {
2255 break;
2256 }
2257
2258 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2259 if (error == 0) {
2260 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2261 }
2262 cfil_release_sockbuf(so, 0);
2263 } while (0);
2264
2265 socket_lock_assert_owned(so);
2266 socket_unlock(so, 1);
2267
2268 cfil_rw_lock_shared(&cfil_lck_rw);
2269 }
2270 done:
2271 cfil_rw_unlock_shared(&cfil_lck_rw);
2272 }
2273
2274 void
2275 cfil_init(void)
2276 {
2277 struct kern_ctl_reg kern_ctl;
2278 errno_t error = 0;
2279 vm_size_t content_filter_size = 0; /* size of content_filter */
2280 vm_size_t cfil_info_size = 0; /* size of cfil_info */
2281 vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2282 vm_size_t cfil_db_size = 0; /* size of cfil_db */
2283 unsigned int mbuf_limit = 0;
2284
2285 CFIL_LOG(LOG_NOTICE, "");
2286
2287 /*
2288 * Compile time verifications
2289 */
2290 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2291 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2292 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2293 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2294
2295 /*
2296 * Runtime time verifications
2297 */
2298 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2299 sizeof(uint32_t)));
2300 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2301 sizeof(uint32_t)));
2302 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2303 sizeof(uint32_t)));
2304 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2305 sizeof(uint32_t)));
2306
2307 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2308 sizeof(uint32_t)));
2309 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2310 sizeof(uint32_t)));
2311
2312 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2313 sizeof(uint32_t)));
2314 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2315 sizeof(uint32_t)));
2316 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2317 sizeof(uint32_t)));
2318 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2319 sizeof(uint32_t)));
2320
2321 /*
2322 * Zone for content filters kernel control sockets
2323 */
2324 content_filter_size = sizeof(struct content_filter);
2325 content_filter_zone = zinit(content_filter_size,
2326 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2327 0,
2328 CONTENT_FILTER_ZONE_NAME);
2329 if (content_filter_zone == NULL) {
2330 panic("%s: zinit(%s) failed", __func__,
2331 CONTENT_FILTER_ZONE_NAME);
2332 /* NOTREACHED */
2333 }
2334 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2335 zone_change(content_filter_zone, Z_EXPAND, TRUE);
2336
2337 /*
2338 * Zone for per socket content filters
2339 */
2340 cfil_info_size = sizeof(struct cfil_info);
2341 cfil_info_zone = zinit(cfil_info_size,
2342 CFIL_INFO_ZONE_MAX * cfil_info_size,
2343 0,
2344 CFIL_INFO_ZONE_NAME);
2345 if (cfil_info_zone == NULL) {
2346 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2347 /* NOTREACHED */
2348 }
2349 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2350 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2351
2352 /*
2353 * Zone for content filters cfil hash entries and db
2354 */
2355 cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2356 cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2357 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2358 0,
2359 CFIL_HASH_ENTRY_ZONE_NAME);
2360 if (cfil_hash_entry_zone == NULL) {
2361 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2362 /* NOTREACHED */
2363 }
2364 zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2365 zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2366
2367 cfil_db_size = sizeof(struct cfil_db);
2368 cfil_db_zone = zinit(cfil_db_size,
2369 CFIL_DB_ZONE_MAX * cfil_db_size,
2370 0,
2371 CFIL_DB_ZONE_NAME);
2372 if (cfil_db_zone == NULL) {
2373 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2374 /* NOTREACHED */
2375 }
2376 zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2377 zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2378
2379 /*
2380 * Allocate locks
2381 */
2382 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2383 if (cfil_lck_grp_attr == NULL) {
2384 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2385 /* NOTREACHED */
2386 }
2387 cfil_lck_grp = lck_grp_alloc_init("content filter",
2388 cfil_lck_grp_attr);
2389 if (cfil_lck_grp == NULL) {
2390 panic("%s: lck_grp_alloc_init failed", __func__);
2391 /* NOTREACHED */
2392 }
2393 cfil_lck_attr = lck_attr_alloc_init();
2394 if (cfil_lck_attr == NULL) {
2395 panic("%s: lck_attr_alloc_init failed", __func__);
2396 /* NOTREACHED */
2397 }
2398 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2399
2400 TAILQ_INIT(&cfil_sock_head);
2401 TAILQ_INIT(&cfil_sock_head_stats);
2402
2403 /*
2404 * Register kernel control
2405 */
2406 bzero(&kern_ctl, sizeof(kern_ctl));
2407 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2408 sizeof(kern_ctl.ctl_name));
2409 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2410 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2411 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2412 kern_ctl.ctl_connect = cfil_ctl_connect;
2413 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2414 kern_ctl.ctl_send = cfil_ctl_send;
2415 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2416 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2417 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2418 error = ctl_register(&kern_ctl, &cfil_kctlref);
2419 if (error != 0) {
2420 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2421 return;
2422 }
2423
2424 // Spawn thread for gargage collection
2425 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2426 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2427 panic_plain("%s: Can't create UDP GC thread", __func__);
2428 /* NOTREACHED */
2429 }
2430 /* this must not fail */
2431 VERIFY(cfil_udp_gc_thread != NULL);
2432
2433 // Spawn thread for statistics reporting
2434 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2435 &cfil_stats_report_thread) != KERN_SUCCESS) {
2436 panic_plain("%s: Can't create statistics report thread", __func__);
2437 /* NOTREACHED */
2438 }
2439 /* this must not fail */
2440 VERIFY(cfil_stats_report_thread != NULL);
2441
2442 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2443 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2444 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2445 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2446
2447 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2448 }
2449
2450 struct cfil_info *
2451 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2452 {
2453 int kcunit;
2454 struct cfil_info *cfil_info = NULL;
2455 struct inpcb *inp = sotoinpcb(so);
2456
2457 CFIL_LOG(LOG_INFO, "");
2458
2459 socket_lock_assert_owned(so);
2460
2461 cfil_info = zalloc(cfil_info_zone);
2462 if (cfil_info == NULL) {
2463 goto done;
2464 }
2465 bzero(cfil_info, sizeof(struct cfil_info));
2466
2467 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2468 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2469
2470 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2471 struct cfil_entry *entry;
2472
2473 entry = &cfil_info->cfi_entries[kcunit - 1];
2474 entry->cfe_cfil_info = cfil_info;
2475
2476 /* Initialize the filter entry */
2477 entry->cfe_filter = NULL;
2478 entry->cfe_flags = 0;
2479 entry->cfe_necp_control_unit = 0;
2480 entry->cfe_snd.cfe_pass_offset = 0;
2481 entry->cfe_snd.cfe_peek_offset = 0;
2482 entry->cfe_snd.cfe_peeked = 0;
2483 entry->cfe_rcv.cfe_pass_offset = 0;
2484 entry->cfe_rcv.cfe_peek_offset = 0;
2485 entry->cfe_rcv.cfe_peeked = 0;
2486 /*
2487 * Timestamp the last action to avoid pre-maturely
2488 * triggering garbage collection
2489 */
2490 microuptime(&entry->cfe_last_action);
2491
2492 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2493 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2494 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2495 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2496 }
2497
2498 cfil_rw_lock_exclusive(&cfil_lck_rw);
2499
2500 /*
2501 * Create a cfi_sock_id that's not the socket pointer!
2502 */
2503
2504 if (hash_entry == NULL) {
2505 // This is the TCP case, cfil_info is tracked per socket
2506 if (inp->inp_flowhash == 0) {
2507 inp->inp_flowhash = inp_calc_flowhash(inp);
2508 }
2509
2510 so->so_cfil = cfil_info;
2511 cfil_info->cfi_so = so;
2512 cfil_info->cfi_sock_id =
2513 ((so->so_gencnt << 32) | inp->inp_flowhash);
2514 } else {
2515 // This is the UDP case, cfil_info is tracked in per-socket hash
2516 cfil_info->cfi_so = so;
2517 hash_entry->cfentry_cfil = cfil_info;
2518 cfil_info->cfi_hash_entry = hash_entry;
2519 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2520 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2521 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2522
2523 // Wake up gc thread if this is first flow added
2524 if (cfil_sock_udp_attached_count == 0) {
2525 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2526 }
2527
2528 cfil_sock_udp_attached_count++;
2529 }
2530
2531 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2532 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2533
2534 cfil_sock_attached_count++;
2535
2536 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2537
2538 done:
2539 if (cfil_info != NULL) {
2540 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2541 } else {
2542 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2543 }
2544
2545 return cfil_info;
2546 }
2547
2548 int
2549 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2550 {
2551 int kcunit;
2552 int attached = 0;
2553
2554 CFIL_LOG(LOG_INFO, "");
2555
2556 socket_lock_assert_owned(so);
2557
2558 cfil_rw_lock_exclusive(&cfil_lck_rw);
2559
2560 for (kcunit = 1;
2561 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2562 kcunit++) {
2563 struct content_filter *cfc = content_filters[kcunit - 1];
2564 struct cfil_entry *entry;
2565 struct cfil_entry *iter_entry;
2566 struct cfil_entry *iter_prev;
2567
2568 if (cfc == NULL) {
2569 continue;
2570 }
2571 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2572 continue;
2573 }
2574
2575 entry = &cfil_info->cfi_entries[kcunit - 1];
2576
2577 entry->cfe_filter = cfc;
2578 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2579 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2580 cfc->cf_sock_count++;
2581
2582 /* Insert the entry into the list ordered by control unit */
2583 iter_prev = NULL;
2584 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2585 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2586 break;
2587 }
2588 iter_prev = iter_entry;
2589 }
2590
2591 if (iter_prev == NULL) {
2592 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2593 } else {
2594 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2595 }
2596
2597 verify_content_filter(cfc);
2598 attached = 1;
2599 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2600 }
2601
2602 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2603
2604 return attached;
2605 }
2606
2607 static void
2608 cfil_info_free(struct cfil_info *cfil_info)
2609 {
2610 int kcunit;
2611 uint64_t in_drain = 0;
2612 uint64_t out_drained = 0;
2613
2614 if (cfil_info == NULL) {
2615 return;
2616 }
2617
2618 CFIL_LOG(LOG_INFO, "");
2619
2620 cfil_rw_lock_exclusive(&cfil_lck_rw);
2621
2622 for (kcunit = 1;
2623 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2624 kcunit++) {
2625 struct cfil_entry *entry;
2626 struct content_filter *cfc;
2627
2628 entry = &cfil_info->cfi_entries[kcunit - 1];
2629
2630 /* Don't be silly and try to detach twice */
2631 if (entry->cfe_filter == NULL) {
2632 continue;
2633 }
2634
2635 cfc = content_filters[kcunit - 1];
2636
2637 VERIFY(cfc == entry->cfe_filter);
2638
2639 entry->cfe_filter = NULL;
2640 entry->cfe_necp_control_unit = 0;
2641 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2642 cfc->cf_sock_count--;
2643
2644 verify_content_filter(cfc);
2645 }
2646 if (cfil_info->cfi_hash_entry != NULL) {
2647 cfil_sock_udp_attached_count--;
2648 }
2649 cfil_sock_attached_count--;
2650 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2651
2652 // Turn off stats reporting for cfil_info.
2653 cfil_info_stats_toggle(cfil_info, NULL, 0);
2654
2655 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2656 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2657
2658 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2659 struct cfil_entry *entry;
2660
2661 entry = &cfil_info->cfi_entries[kcunit - 1];
2662 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2663 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2664 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2665 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2666 }
2667 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2668
2669 if (out_drained) {
2670 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2671 }
2672 if (in_drain) {
2673 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2674 }
2675
2676 zfree(cfil_info_zone, cfil_info);
2677 }
2678
2679 /*
2680 * Received a verdict from userspace for a socket.
2681 * Perform any delayed operation if needed.
2682 */
2683 static void
2684 cfil_sock_received_verdict(struct socket *so)
2685 {
2686 if (so == NULL || so->so_cfil == NULL) {
2687 return;
2688 }
2689
2690 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2691
2692 /*
2693 * If socket has already been connected, trigger
2694 * soisconnected now.
2695 */
2696 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2697 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2698 soisconnected(so);
2699 return;
2700 }
2701 }
2702
2703 /*
2704 * Entry point from Sockets layer
2705 * The socket is locked.
2706 *
2707 * Checks if a connected socket is subject to filter and
2708 * pending the initial verdict.
2709 */
2710 boolean_t
2711 cfil_sock_connected_pending_verdict(struct socket *so)
2712 {
2713 if (so == NULL || so->so_cfil == NULL) {
2714 return false;
2715 }
2716
2717 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2718 return false;
2719 } else {
2720 /*
2721 * Remember that this protocol is already connected, so
2722 * we will trigger soisconnected() upon receipt of
2723 * initial verdict later.
2724 */
2725 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2726 return true;
2727 }
2728 }
2729
2730 boolean_t
2731 cfil_filter_present(void)
2732 {
2733 return cfil_active_count > 0;
2734 }
2735
2736 /*
2737 * Entry point from Sockets layer
2738 * The socket is locked.
2739 */
2740 errno_t
2741 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2742 {
2743 errno_t error = 0;
2744 uint32_t filter_control_unit;
2745
2746 socket_lock_assert_owned(so);
2747
2748 /* Limit ourselves to TCP that are not MPTCP subflows */
2749 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2750 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2751 so->so_proto->pr_type != SOCK_STREAM ||
2752 so->so_proto->pr_protocol != IPPROTO_TCP ||
2753 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2754 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2755 goto done;
2756 }
2757
2758 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2759 if (filter_control_unit == 0) {
2760 goto done;
2761 }
2762
2763 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2764 goto done;
2765 }
2766 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2767 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2768 goto done;
2769 }
2770 if (cfil_active_count == 0) {
2771 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2772 goto done;
2773 }
2774 if (so->so_cfil != NULL) {
2775 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2776 CFIL_LOG(LOG_ERR, "already attached");
2777 } else {
2778 cfil_info_alloc(so, NULL);
2779 if (so->so_cfil == NULL) {
2780 error = ENOMEM;
2781 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2782 goto done;
2783 }
2784 so->so_cfil->cfi_dir = dir;
2785 }
2786 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2787 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2788 filter_control_unit);
2789 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2790 goto done;
2791 }
2792 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2793 (uint64_t)VM_KERNEL_ADDRPERM(so),
2794 filter_control_unit, so->so_cfil->cfi_sock_id);
2795
2796 so->so_flags |= SOF_CONTENT_FILTER;
2797 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2798
2799 /* Hold a reference on the socket */
2800 so->so_usecount++;
2801
2802 /*
2803 * Save passed addresses for attach event msg (in case resend
2804 * is needed.
2805 */
2806 if (remote != NULL) {
2807 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2808 }
2809 if (local != NULL) {
2810 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2811 }
2812
2813 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2814 /* We can recover from flow control or out of memory errors */
2815 if (error == ENOBUFS || error == ENOMEM) {
2816 error = 0;
2817 } else if (error != 0) {
2818 goto done;
2819 }
2820
2821 CFIL_INFO_VERIFY(so->so_cfil);
2822 done:
2823 return error;
2824 }
2825
2826 /*
2827 * Entry point from Sockets layer
2828 * The socket is locked.
2829 */
2830 errno_t
2831 cfil_sock_detach(struct socket *so)
2832 {
2833 if (IS_UDP(so)) {
2834 cfil_db_free(so);
2835 return 0;
2836 }
2837
2838 if (so->so_cfil) {
2839 if (so->so_flags & SOF_CONTENT_FILTER) {
2840 so->so_flags &= ~SOF_CONTENT_FILTER;
2841 VERIFY(so->so_usecount > 0);
2842 so->so_usecount--;
2843 }
2844 cfil_info_free(so->so_cfil);
2845 so->so_cfil = NULL;
2846 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2847 }
2848 return 0;
2849 }
2850
2851 /*
2852 * Fill in the address info of an event message from either
2853 * the socket or passed in address info.
2854 */
2855 static void
2856 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2857 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2858 boolean_t isIPv4, boolean_t outgoing)
2859 {
2860 if (isIPv4) {
2861 struct in_addr laddr = {0}, faddr = {0};
2862 u_int16_t lport = 0, fport = 0;
2863
2864 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2865
2866 if (outgoing) {
2867 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2868 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2869 } else {
2870 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2871 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2872 }
2873 } else {
2874 struct in6_addr *laddr = NULL, *faddr = NULL;
2875 u_int16_t lport = 0, fport = 0;
2876
2877 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2878 if (outgoing) {
2879 fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2880 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2881 } else {
2882 fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2883 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2884 }
2885 }
2886 }
2887
2888 static boolean_t
2889 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2890 struct cfil_info *cfil_info,
2891 struct cfil_msg_sock_attached *msg)
2892 {
2893 struct cfil_crypto_data data = {};
2894
2895 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2896 return false;
2897 }
2898
2899 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2900 data.direction = msg->cfs_conn_dir;
2901
2902 data.pid = msg->cfs_pid;
2903 data.effective_pid = msg->cfs_e_pid;
2904 uuid_copy(data.uuid, msg->cfs_uuid);
2905 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2906 data.socketProtocol = msg->cfs_sock_protocol;
2907 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2908 data.remote.sin6 = msg->cfs_dst.sin6;
2909 data.local.sin6 = msg->cfs_src.sin6;
2910 } else {
2911 data.remote.sin6 = msg->cfs_src.sin6;
2912 data.local.sin6 = msg->cfs_dst.sin6;
2913 }
2914
2915 // At attach, if local address is already present, no need to re-sign subsequent data messages.
2916 if (!NULLADDRESS(data.local)) {
2917 cfil_info->cfi_isSignatureLatest = true;
2918 }
2919
2920 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2921 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2922 msg->cfs_signature_length = 0;
2923 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2924 msg->cfs_msghdr.cfm_sock_id);
2925 return false;
2926 }
2927
2928 return true;
2929 }
2930
2931 static boolean_t
2932 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2933 struct socket *so, struct cfil_info *cfil_info,
2934 struct cfil_msg_data_event *msg)
2935 {
2936 struct cfil_crypto_data data = {};
2937
2938 if (crypto_state == NULL || msg == NULL ||
2939 so == NULL || cfil_info == NULL) {
2940 return false;
2941 }
2942
2943 data.sock_id = cfil_info->cfi_sock_id;
2944 data.direction = cfil_info->cfi_dir;
2945 data.pid = so->last_pid;
2946 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2947 if (so->so_flags & SOF_DELEGATED) {
2948 data.effective_pid = so->e_pid;
2949 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2950 } else {
2951 data.effective_pid = so->last_pid;
2952 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2953 }
2954 data.socketProtocol = so->so_proto->pr_protocol;
2955
2956 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2957 data.remote.sin6 = msg->cfc_dst.sin6;
2958 data.local.sin6 = msg->cfc_src.sin6;
2959 } else {
2960 data.remote.sin6 = msg->cfc_src.sin6;
2961 data.local.sin6 = msg->cfc_dst.sin6;
2962 }
2963
2964 // At first data, local address may show up for the first time, update address cache and
2965 // no need to re-sign subsequent data messages anymore.
2966 if (!NULLADDRESS(data.local)) {
2967 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
2968 cfil_info->cfi_isSignatureLatest = true;
2969 }
2970
2971 msg->cfd_signature_length = sizeof(cfil_crypto_signature);
2972 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
2973 msg->cfd_signature_length = 0;
2974 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
2975 msg->cfd_msghdr.cfm_sock_id);
2976 return false;
2977 }
2978
2979 return true;
2980 }
2981
2982 static boolean_t
2983 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
2984 struct socket *so, struct cfil_info *cfil_info,
2985 struct cfil_msg_sock_closed *msg)
2986 {
2987 struct cfil_crypto_data data = {};
2988 struct cfil_hash_entry hash_entry = {};
2989 struct cfil_hash_entry *hash_entry_ptr = NULL;
2990 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2991
2992 if (crypto_state == NULL || msg == NULL ||
2993 so == NULL || inp == NULL || cfil_info == NULL) {
2994 return false;
2995 }
2996
2997 data.sock_id = cfil_info->cfi_sock_id;
2998 data.direction = cfil_info->cfi_dir;
2999
3000 data.pid = so->last_pid;
3001 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3002 if (so->so_flags & SOF_DELEGATED) {
3003 data.effective_pid = so->e_pid;
3004 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3005 } else {
3006 data.effective_pid = so->last_pid;
3007 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3008 }
3009 data.socketProtocol = so->so_proto->pr_protocol;
3010
3011 /*
3012 * Fill in address info:
3013 * For UDP, use the cfil_info hash entry directly.
3014 * For TCP, compose an hash entry with the saved addresses.
3015 */
3016 if (cfil_info->cfi_hash_entry != NULL) {
3017 hash_entry_ptr = cfil_info->cfi_hash_entry;
3018 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3019 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3020 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3021 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3022 hash_entry_ptr = &hash_entry;
3023 }
3024 if (hash_entry_ptr != NULL) {
3025 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3026 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3027 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3028 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, inp->inp_vflag & INP_IPV4, outgoing);
3029 }
3030
3031 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3032 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3033
3034 msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3035 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3036 msg->cfc_signature_length = 0;
3037 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3038 msg->cfc_msghdr.cfm_sock_id);
3039 return false;
3040 }
3041
3042 return true;
3043 }
3044
3045 static int
3046 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3047 uint32_t kcunit, int conn_dir)
3048 {
3049 errno_t error = 0;
3050 struct cfil_entry *entry = NULL;
3051 struct cfil_msg_sock_attached msg_attached;
3052 struct content_filter *cfc = NULL;
3053 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3054 struct cfil_hash_entry *hash_entry_ptr = NULL;
3055 struct cfil_hash_entry hash_entry;
3056
3057 memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3058 proc_t p = PROC_NULL;
3059 task_t t = TASK_NULL;
3060
3061 socket_lock_assert_owned(so);
3062
3063 cfil_rw_lock_shared(&cfil_lck_rw);
3064
3065 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3066 error = EINVAL;
3067 goto done;
3068 }
3069
3070 if (kcunit == 0) {
3071 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3072 } else {
3073 entry = &cfil_info->cfi_entries[kcunit - 1];
3074 }
3075
3076 if (entry == NULL) {
3077 goto done;
3078 }
3079
3080 cfc = entry->cfe_filter;
3081 if (cfc == NULL) {
3082 goto done;
3083 }
3084
3085 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3086 goto done;
3087 }
3088
3089 if (kcunit == 0) {
3090 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3091 }
3092
3093 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3094 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3095
3096 /* Would be wasteful to try when flow controlled */
3097 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3098 error = ENOBUFS;
3099 goto done;
3100 }
3101
3102 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3103 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3104 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3105 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3106 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3107 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3108
3109 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3110 msg_attached.cfs_sock_type = so->so_proto->pr_type;
3111 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3112 msg_attached.cfs_pid = so->last_pid;
3113 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3114 if (so->so_flags & SOF_DELEGATED) {
3115 msg_attached.cfs_e_pid = so->e_pid;
3116 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3117 } else {
3118 msg_attached.cfs_e_pid = so->last_pid;
3119 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3120 }
3121
3122 /*
3123 * Fill in address info:
3124 * For UDP, use the cfil_info hash entry directly.
3125 * For TCP, compose an hash entry with the saved addresses.
3126 */
3127 if (cfil_info->cfi_hash_entry != NULL) {
3128 hash_entry_ptr = cfil_info->cfi_hash_entry;
3129 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3130 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3131 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3132 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3133 hash_entry_ptr = &hash_entry;
3134 }
3135 if (hash_entry_ptr != NULL) {
3136 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3137 &msg_attached.cfs_src, &msg_attached.cfs_dst,
3138 inp->inp_vflag & INP_IPV4, conn_dir == CFS_CONNECTION_DIR_OUT);
3139 }
3140 msg_attached.cfs_conn_dir = conn_dir;
3141
3142 if (msg_attached.cfs_e_pid != 0) {
3143 p = proc_find(msg_attached.cfs_e_pid);
3144 if (p != PROC_NULL) {
3145 t = proc_task(p);
3146 if (t != TASK_NULL) {
3147 audit_token_t audit_token;
3148 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3149 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3150 memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3151 } else {
3152 CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3153 entry->cfe_cfil_info->cfi_sock_id);
3154 }
3155 }
3156 proc_rele(p);
3157 }
3158 }
3159
3160 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3161
3162 #if LIFECYCLE_DEBUG
3163 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3164 entry->cfe_cfil_info->cfi_sock_id);
3165 #endif
3166
3167 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3168 entry->cfe_filter->cf_kcunit,
3169 &msg_attached,
3170 sizeof(struct cfil_msg_sock_attached),
3171 CTL_DATA_EOR);
3172 if (error != 0) {
3173 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3174 goto done;
3175 }
3176 microuptime(&entry->cfe_last_event);
3177 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3178 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3179
3180 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3181 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3182 done:
3183
3184 /* We can recover from flow control */
3185 if (error == ENOBUFS) {
3186 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3187 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3188
3189 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3190 cfil_rw_lock_exclusive(&cfil_lck_rw);
3191 }
3192
3193 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3194
3195 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3196 } else {
3197 if (error != 0) {
3198 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3199 }
3200
3201 cfil_rw_unlock_shared(&cfil_lck_rw);
3202 }
3203 return error;
3204 }
3205
3206 static int
3207 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3208 {
3209 errno_t error = 0;
3210 struct mbuf *msg = NULL;
3211 struct cfil_entry *entry;
3212 struct cfe_buf *entrybuf;
3213 struct cfil_msg_hdr msg_disconnected;
3214 struct content_filter *cfc;
3215
3216 socket_lock_assert_owned(so);
3217
3218 cfil_rw_lock_shared(&cfil_lck_rw);
3219
3220 entry = &cfil_info->cfi_entries[kcunit - 1];
3221 if (outgoing) {
3222 entrybuf = &entry->cfe_snd;
3223 } else {
3224 entrybuf = &entry->cfe_rcv;
3225 }
3226
3227 cfc = entry->cfe_filter;
3228 if (cfc == NULL) {
3229 goto done;
3230 }
3231
3232 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3233 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3234
3235 /*
3236 * Send the disconnection event once
3237 */
3238 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3239 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3240 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3241 (uint64_t)VM_KERNEL_ADDRPERM(so));
3242 goto done;
3243 }
3244
3245 /*
3246 * We're not disconnected as long as some data is waiting
3247 * to be delivered to the filter
3248 */
3249 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3250 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3251 (uint64_t)VM_KERNEL_ADDRPERM(so));
3252 error = EBUSY;
3253 goto done;
3254 }
3255 /* Would be wasteful to try when flow controlled */
3256 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3257 error = ENOBUFS;
3258 goto done;
3259 }
3260
3261 #if LIFECYCLE_DEBUG
3262 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3263 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3264 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3265 #endif
3266
3267 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3268 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3269 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3270 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3271 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3272 CFM_OP_DISCONNECT_IN;
3273 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3274 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3275 entry->cfe_filter->cf_kcunit,
3276 &msg_disconnected,
3277 sizeof(struct cfil_msg_hdr),
3278 CTL_DATA_EOR);
3279 if (error != 0) {
3280 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3281 mbuf_freem(msg);
3282 goto done;
3283 }
3284 microuptime(&entry->cfe_last_event);
3285 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3286
3287 /* Remember we have sent the disconnection message */
3288 if (outgoing) {
3289 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3290 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3291 } else {
3292 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3293 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3294 }
3295 done:
3296 if (error == ENOBUFS) {
3297 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3298 OSIncrementAtomic(
3299 &cfil_stats.cfs_disconnect_event_flow_control);
3300
3301 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3302 cfil_rw_lock_exclusive(&cfil_lck_rw);
3303 }
3304
3305 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3306
3307 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3308 } else {
3309 if (error != 0) {
3310 OSIncrementAtomic(
3311 &cfil_stats.cfs_disconnect_event_fail);
3312 }
3313
3314 cfil_rw_unlock_shared(&cfil_lck_rw);
3315 }
3316 return error;
3317 }
3318
3319 int
3320 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3321 {
3322 struct cfil_entry *entry;
3323 struct cfil_msg_sock_closed msg_closed;
3324 errno_t error = 0;
3325 struct content_filter *cfc;
3326
3327 socket_lock_assert_owned(so);
3328
3329 cfil_rw_lock_shared(&cfil_lck_rw);
3330
3331 entry = &cfil_info->cfi_entries[kcunit - 1];
3332 cfc = entry->cfe_filter;
3333 if (cfc == NULL) {
3334 goto done;
3335 }
3336
3337 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3338 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3339
3340 /* Would be wasteful to try when flow controlled */
3341 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3342 error = ENOBUFS;
3343 goto done;
3344 }
3345 /*
3346 * Send a single closed message per filter
3347 */
3348 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3349 goto done;
3350 }
3351 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3352 goto done;
3353 }
3354
3355 microuptime(&entry->cfe_last_event);
3356 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3357
3358 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3359 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3360 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3361 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3362 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3363 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3364 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3365 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3366 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3367 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3368 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3369 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3370 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3371
3372 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3373
3374 #if LIFECYCLE_DEBUG
3375 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3376 #endif
3377 /* for debugging
3378 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3379 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3380 * }
3381 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3382 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3383 * }
3384 */
3385
3386 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3387 entry->cfe_filter->cf_kcunit,
3388 &msg_closed,
3389 sizeof(struct cfil_msg_sock_closed),
3390 CTL_DATA_EOR);
3391 if (error != 0) {
3392 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3393 error);
3394 goto done;
3395 }
3396
3397 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3398 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3399 done:
3400 /* We can recover from flow control */
3401 if (error == ENOBUFS) {
3402 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3403 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3404
3405 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3406 cfil_rw_lock_exclusive(&cfil_lck_rw);
3407 }
3408
3409 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3410
3411 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3412 } else {
3413 if (error != 0) {
3414 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3415 }
3416
3417 cfil_rw_unlock_shared(&cfil_lck_rw);
3418 }
3419
3420 return error;
3421 }
3422
3423 static void
3424 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3425 struct in6_addr *ip6, u_int16_t port)
3426 {
3427 if (sin46 == NULL) {
3428 return;
3429 }
3430
3431 struct sockaddr_in6 *sin6 = &sin46->sin6;
3432
3433 sin6->sin6_family = AF_INET6;
3434 sin6->sin6_len = sizeof(*sin6);
3435 sin6->sin6_port = port;
3436 sin6->sin6_addr = *ip6;
3437 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3438 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3439 sin6->sin6_addr.s6_addr16[1] = 0;
3440 }
3441 }
3442
3443 static void
3444 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3445 struct in_addr ip, u_int16_t port)
3446 {
3447 if (sin46 == NULL) {
3448 return;
3449 }
3450
3451 struct sockaddr_in *sin = &sin46->sin;
3452
3453 sin->sin_family = AF_INET;
3454 sin->sin_len = sizeof(*sin);
3455 sin->sin_port = port;
3456 sin->sin_addr.s_addr = ip.s_addr;
3457 }
3458
3459 static void
3460 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3461 struct in6_addr **laddr, struct in6_addr **faddr,
3462 u_int16_t *lport, u_int16_t *fport)
3463 {
3464 if (entry != NULL) {
3465 *laddr = &entry->cfentry_laddr.addr6;
3466 *faddr = &entry->cfentry_faddr.addr6;
3467 *lport = entry->cfentry_lport;
3468 *fport = entry->cfentry_fport;
3469 } else {
3470 *laddr = &inp->in6p_laddr;
3471 *faddr = &inp->in6p_faddr;
3472 *lport = inp->inp_lport;
3473 *fport = inp->inp_fport;
3474 }
3475 }
3476
3477 static void
3478 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3479 struct in_addr *laddr, struct in_addr *faddr,
3480 u_int16_t *lport, u_int16_t *fport)
3481 {
3482 if (entry != NULL) {
3483 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3484 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3485 *lport = entry->cfentry_lport;
3486 *fport = entry->cfentry_fport;
3487 } else {
3488 *laddr = inp->inp_laddr;
3489 *faddr = inp->inp_faddr;
3490 *lport = inp->inp_lport;
3491 *fport = inp->inp_fport;
3492 }
3493 }
3494
3495 static int
3496 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3497 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3498 {
3499 errno_t error = 0;
3500 struct mbuf *copy = NULL;
3501 struct mbuf *msg = NULL;
3502 unsigned int one = 1;
3503 struct cfil_msg_data_event *data_req;
3504 size_t hdrsize;
3505 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3506 struct cfil_entry *entry;
3507 struct cfe_buf *entrybuf;
3508 struct content_filter *cfc;
3509 struct timeval tv;
3510
3511 cfil_rw_lock_shared(&cfil_lck_rw);
3512
3513 entry = &cfil_info->cfi_entries[kcunit - 1];
3514 if (outgoing) {
3515 entrybuf = &entry->cfe_snd;
3516 } else {
3517 entrybuf = &entry->cfe_rcv;
3518 }
3519
3520 cfc = entry->cfe_filter;
3521 if (cfc == NULL) {
3522 goto done;
3523 }
3524
3525 data = cfil_data_start(data);
3526 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3527 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3528 goto done;
3529 }
3530
3531 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3532 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3533
3534 socket_lock_assert_owned(so);
3535
3536 /* Would be wasteful to try */
3537 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3538 error = ENOBUFS;
3539 goto done;
3540 }
3541
3542 /* Make a copy of the data to pass to kernel control socket */
3543 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3544 M_COPYM_NOOP_HDR);
3545 if (copy == NULL) {
3546 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3547 error = ENOMEM;
3548 goto done;
3549 }
3550
3551 /* We need an mbuf packet for the message header */
3552 hdrsize = sizeof(struct cfil_msg_data_event);
3553 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3554 if (error != 0) {
3555 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3556 m_freem(copy);
3557 /*
3558 * ENOBUFS is to indicate flow control
3559 */
3560 error = ENOMEM;
3561 goto done;
3562 }
3563 mbuf_setlen(msg, hdrsize);
3564 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3565 msg->m_next = copy;
3566 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3567 bzero(data_req, hdrsize);
3568 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3569 data_req->cfd_msghdr.cfm_version = 1;
3570 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3571 data_req->cfd_msghdr.cfm_op =
3572 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3573 data_req->cfd_msghdr.cfm_sock_id =
3574 entry->cfe_cfil_info->cfi_sock_id;
3575 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3576 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3577
3578 /*
3579 * Copy address/port into event msg.
3580 * For non connected sockets need to copy addresses from passed
3581 * parameters
3582 */
3583 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3584 &data_req->cfc_src, &data_req->cfc_dst,
3585 inp->inp_vflag & INP_IPV4, outgoing);
3586
3587 if (cfil_info->cfi_isSignatureLatest == false) {
3588 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3589 }
3590
3591 microuptime(&tv);
3592 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3593
3594 /* Pass the message to the content filter */
3595 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3596 entry->cfe_filter->cf_kcunit,
3597 msg, CTL_DATA_EOR);
3598 if (error != 0) {
3599 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3600 mbuf_freem(msg);
3601 goto done;
3602 }
3603 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3604 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3605
3606 #if VERDICT_DEBUG
3607 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3608 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3609 #endif
3610
3611 done:
3612 if (error == ENOBUFS) {
3613 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3614 OSIncrementAtomic(
3615 &cfil_stats.cfs_data_event_flow_control);
3616
3617 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3618 cfil_rw_lock_exclusive(&cfil_lck_rw);
3619 }
3620
3621 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3622
3623 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3624 } else {
3625 if (error != 0) {
3626 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3627 }
3628
3629 cfil_rw_unlock_shared(&cfil_lck_rw);
3630 }
3631 return error;
3632 }
3633
3634 /*
3635 * Process the queue of data waiting to be delivered to content filter
3636 */
3637 static int
3638 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3639 {
3640 errno_t error = 0;
3641 struct mbuf *data, *tmp = NULL;
3642 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3643 struct cfil_entry *entry;
3644 struct cfe_buf *entrybuf;
3645 uint64_t currentoffset = 0;
3646
3647 if (cfil_info == NULL) {
3648 return 0;
3649 }
3650
3651 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3652 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3653
3654 socket_lock_assert_owned(so);
3655
3656 entry = &cfil_info->cfi_entries[kcunit - 1];
3657 if (outgoing) {
3658 entrybuf = &entry->cfe_snd;
3659 } else {
3660 entrybuf = &entry->cfe_rcv;
3661 }
3662
3663 /* Send attached message if not yet done */
3664 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3665 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3666 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3667 if (error != 0) {
3668 /* We can recover from flow control */
3669 if (error == ENOBUFS || error == ENOMEM) {
3670 error = 0;
3671 }
3672 goto done;
3673 }
3674 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3675 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3676 goto done;
3677 }
3678
3679 #if DATA_DEBUG
3680 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3681 entrybuf->cfe_pass_offset,
3682 entrybuf->cfe_peeked,
3683 entrybuf->cfe_peek_offset);
3684 #endif
3685
3686 /* Move all data that can pass */
3687 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3688 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3689 datalen = cfil_data_length(data, NULL, NULL);
3690 tmp = data;
3691
3692 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3693 entrybuf->cfe_pass_offset) {
3694 /*
3695 * The first mbuf can fully pass
3696 */
3697 copylen = datalen;
3698 } else {
3699 /*
3700 * The first mbuf can partially pass
3701 */
3702 copylen = entrybuf->cfe_pass_offset -
3703 entrybuf->cfe_ctl_q.q_start;
3704 }
3705 VERIFY(copylen <= datalen);
3706
3707 #if DATA_DEBUG
3708 CFIL_LOG(LOG_DEBUG,
3709 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3710 "datalen %u copylen %u",
3711 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3712 entrybuf->cfe_ctl_q.q_start,
3713 entrybuf->cfe_peeked,
3714 entrybuf->cfe_pass_offset,
3715 entrybuf->cfe_peek_offset,
3716 datalen, copylen);
3717 #endif
3718
3719 /*
3720 * Data that passes has been peeked at explicitly or
3721 * implicitly
3722 */
3723 if (entrybuf->cfe_ctl_q.q_start + copylen >
3724 entrybuf->cfe_peeked) {
3725 entrybuf->cfe_peeked =
3726 entrybuf->cfe_ctl_q.q_start + copylen;
3727 }
3728 /*
3729 * Stop on partial pass
3730 */
3731 if (copylen < datalen) {
3732 break;
3733 }
3734
3735 /* All good, move full data from ctl queue to pending queue */
3736 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3737
3738 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3739 if (outgoing) {
3740 OSAddAtomic64(datalen,
3741 &cfil_stats.cfs_pending_q_out_enqueued);
3742 } else {
3743 OSAddAtomic64(datalen,
3744 &cfil_stats.cfs_pending_q_in_enqueued);
3745 }
3746 }
3747 CFIL_INFO_VERIFY(cfil_info);
3748 if (tmp != NULL) {
3749 CFIL_LOG(LOG_DEBUG,
3750 "%llx first %llu peeked %llu pass %llu peek %llu"
3751 "datalen %u copylen %u",
3752 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3753 entrybuf->cfe_ctl_q.q_start,
3754 entrybuf->cfe_peeked,
3755 entrybuf->cfe_pass_offset,
3756 entrybuf->cfe_peek_offset,
3757 datalen, copylen);
3758 }
3759 tmp = NULL;
3760
3761 /* Now deal with remaining data the filter wants to peek at */
3762 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3763 currentoffset = entrybuf->cfe_ctl_q.q_start;
3764 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3765 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3766 currentoffset += datalen) {
3767 datalen = cfil_data_length(data, NULL, NULL);
3768 tmp = data;
3769
3770 /* We've already peeked at this mbuf */
3771 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3772 continue;
3773 }
3774 /*
3775 * The data in the first mbuf may have been
3776 * partially peeked at
3777 */
3778 copyoffset = entrybuf->cfe_peeked - currentoffset;
3779 VERIFY(copyoffset < datalen);
3780 copylen = datalen - copyoffset;
3781 VERIFY(copylen <= datalen);
3782 /*
3783 * Do not copy more than needed
3784 */
3785 if (currentoffset + copyoffset + copylen >
3786 entrybuf->cfe_peek_offset) {
3787 copylen = entrybuf->cfe_peek_offset -
3788 (currentoffset + copyoffset);
3789 }
3790
3791 #if DATA_DEBUG
3792 CFIL_LOG(LOG_DEBUG,
3793 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3794 "datalen %u copylen %u copyoffset %u",
3795 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3796 currentoffset,
3797 entrybuf->cfe_peeked,
3798 entrybuf->cfe_pass_offset,
3799 entrybuf->cfe_peek_offset,
3800 datalen, copylen, copyoffset);
3801 #endif
3802
3803 /*
3804 * Stop if there is nothing more to peek at
3805 */
3806 if (copylen == 0) {
3807 break;
3808 }
3809 /*
3810 * Let the filter get a peek at this span of data
3811 */
3812 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3813 outgoing, data, copyoffset, copylen);
3814 if (error != 0) {
3815 /* On error, leave data in ctl_q */
3816 break;
3817 }
3818 entrybuf->cfe_peeked += copylen;
3819 if (outgoing) {
3820 OSAddAtomic64(copylen,
3821 &cfil_stats.cfs_ctl_q_out_peeked);
3822 } else {
3823 OSAddAtomic64(copylen,
3824 &cfil_stats.cfs_ctl_q_in_peeked);
3825 }
3826
3827 /* Stop when data could not be fully peeked at */
3828 if (copylen + copyoffset < datalen) {
3829 break;
3830 }
3831 }
3832 CFIL_INFO_VERIFY(cfil_info);
3833 if (tmp != NULL) {
3834 CFIL_LOG(LOG_DEBUG,
3835 "%llx first %llu peeked %llu pass %llu peek %llu"
3836 "datalen %u copylen %u copyoffset %u",
3837 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3838 currentoffset,
3839 entrybuf->cfe_peeked,
3840 entrybuf->cfe_pass_offset,
3841 entrybuf->cfe_peek_offset,
3842 datalen, copylen, copyoffset);
3843 }
3844
3845 /*
3846 * Process data that has passed the filter
3847 */
3848 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3849 if (error != 0) {
3850 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3851 error);
3852 goto done;
3853 }
3854
3855 /*
3856 * Dispatch disconnect events that could not be sent
3857 */
3858 if (cfil_info == NULL) {
3859 goto done;
3860 } else if (outgoing) {
3861 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3862 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3863 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3864 }
3865 } else {
3866 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3867 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3868 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3869 }
3870 }
3871
3872 done:
3873 CFIL_LOG(LOG_DEBUG,
3874 "first %llu peeked %llu pass %llu peek %llu",
3875 entrybuf->cfe_ctl_q.q_start,
3876 entrybuf->cfe_peeked,
3877 entrybuf->cfe_pass_offset,
3878 entrybuf->cfe_peek_offset);
3879
3880 CFIL_INFO_VERIFY(cfil_info);
3881 return error;
3882 }
3883
3884 /*
3885 * cfil_data_filter()
3886 *
3887 * Process data for a content filter installed on a socket
3888 */
3889 int
3890 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3891 struct mbuf *data, uint64_t datalen)
3892 {
3893 errno_t error = 0;
3894 struct cfil_entry *entry;
3895 struct cfe_buf *entrybuf;
3896
3897 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3898 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3899
3900 socket_lock_assert_owned(so);
3901
3902 entry = &cfil_info->cfi_entries[kcunit - 1];
3903 if (outgoing) {
3904 entrybuf = &entry->cfe_snd;
3905 } else {
3906 entrybuf = &entry->cfe_rcv;
3907 }
3908
3909 /* Are we attached to the filter? */
3910 if (entry->cfe_filter == NULL) {
3911 error = 0;
3912 goto done;
3913 }
3914
3915 /* Dispatch to filters */
3916 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3917 if (outgoing) {
3918 OSAddAtomic64(datalen,
3919 &cfil_stats.cfs_ctl_q_out_enqueued);
3920 } else {
3921 OSAddAtomic64(datalen,
3922 &cfil_stats.cfs_ctl_q_in_enqueued);
3923 }
3924
3925 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3926 if (error != 0) {
3927 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3928 error);
3929 }
3930 /*
3931 * We have to return EJUSTRETURN in all cases to avoid double free
3932 * by socket layer
3933 */
3934 error = EJUSTRETURN;
3935 done:
3936 CFIL_INFO_VERIFY(cfil_info);
3937
3938 CFIL_LOG(LOG_INFO, "return %d", error);
3939 return error;
3940 }
3941
3942 /*
3943 * cfil_service_inject_queue() re-inject data that passed the
3944 * content filters
3945 */
3946 static int
3947 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3948 {
3949 mbuf_t data;
3950 unsigned int datalen;
3951 int mbcnt = 0;
3952 int mbnum = 0;
3953 errno_t error = 0;
3954 struct cfi_buf *cfi_buf;
3955 struct cfil_queue *inject_q;
3956 int need_rwakeup = 0;
3957 int count = 0;
3958
3959 if (cfil_info == NULL) {
3960 return 0;
3961 }
3962
3963 socket_lock_assert_owned(so);
3964
3965 if (outgoing) {
3966 cfi_buf = &cfil_info->cfi_snd;
3967 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3968 } else {
3969 cfi_buf = &cfil_info->cfi_rcv;
3970 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3971 }
3972 inject_q = &cfi_buf->cfi_inject_q;
3973
3974 if (cfil_queue_empty(inject_q)) {
3975 return 0;
3976 }
3977
3978 #if DATA_DEBUG | VERDICT_DEBUG
3979 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3980 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3981 #endif
3982
3983 while ((data = cfil_queue_first(inject_q)) != NULL) {
3984 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3985
3986 #if DATA_DEBUG
3987 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3988 remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3989 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3990 #endif
3991
3992 /* Remove data from queue and adjust stats */
3993 cfil_queue_remove(inject_q, data, datalen);
3994 cfi_buf->cfi_pending_first += datalen;
3995 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3996 cfi_buf->cfi_pending_mbnum -= mbnum;
3997 cfil_info_buf_verify(cfi_buf);
3998
3999 if (outgoing) {
4000 error = sosend_reinject(so, NULL, data, NULL, 0);
4001 if (error != 0) {
4002 #if DATA_DEBUG
4003 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4004 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4005 #endif
4006 break;
4007 }
4008 // At least one injection succeeded, need to wake up pending threads.
4009 need_rwakeup = 1;
4010 } else {
4011 data->m_flags |= M_SKIPCFIL;
4012
4013 /*
4014 * NOTE: We currently only support TCP and UDP.
4015 * For RAWIP, MPTCP and message TCP we'll
4016 * need to call the appropriate sbappendxxx()
4017 * of fix sock_inject_data_in()
4018 */
4019 if (IS_UDP(so) == TRUE) {
4020 if (sbappendchain(&so->so_rcv, data, 0)) {
4021 need_rwakeup = 1;
4022 }
4023 } else {
4024 if (sbappendstream(&so->so_rcv, data)) {
4025 need_rwakeup = 1;
4026 }
4027 }
4028 }
4029
4030 if (outgoing) {
4031 OSAddAtomic64(datalen,
4032 &cfil_stats.cfs_inject_q_out_passed);
4033 } else {
4034 OSAddAtomic64(datalen,
4035 &cfil_stats.cfs_inject_q_in_passed);
4036 }
4037
4038 count++;
4039 }
4040
4041 #if DATA_DEBUG | VERDICT_DEBUG
4042 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4043 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4044 #endif
4045
4046 /* A single wakeup is for several packets is more efficient */
4047 if (need_rwakeup) {
4048 if (outgoing == TRUE) {
4049 sowwakeup(so);
4050 } else {
4051 sorwakeup(so);
4052 }
4053 }
4054
4055 if (error != 0 && cfil_info) {
4056 if (error == ENOBUFS) {
4057 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4058 }
4059 if (error == ENOMEM) {
4060 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4061 }
4062
4063 if (outgoing) {
4064 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4065 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4066 } else {
4067 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4068 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4069 }
4070 }
4071
4072 /*
4073 * Notify
4074 */
4075 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4076 cfil_sock_notify_shutdown(so, SHUT_WR);
4077 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4078 soshutdownlock_final(so, SHUT_WR);
4079 }
4080 }
4081 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4082 if (cfil_filters_attached(so) == 0) {
4083 CFIL_LOG(LOG_INFO, "so %llx waking",
4084 (uint64_t)VM_KERNEL_ADDRPERM(so));
4085 wakeup((caddr_t)cfil_info);
4086 }
4087 }
4088
4089 CFIL_INFO_VERIFY(cfil_info);
4090
4091 return error;
4092 }
4093
4094 static int
4095 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4096 {
4097 uint64_t passlen, curlen;
4098 mbuf_t data;
4099 unsigned int datalen;
4100 errno_t error = 0;
4101 struct cfil_entry *entry;
4102 struct cfe_buf *entrybuf;
4103 struct cfil_queue *pending_q;
4104
4105 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4106 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4107
4108 socket_lock_assert_owned(so);
4109
4110 entry = &cfil_info->cfi_entries[kcunit - 1];
4111 if (outgoing) {
4112 entrybuf = &entry->cfe_snd;
4113 } else {
4114 entrybuf = &entry->cfe_rcv;
4115 }
4116
4117 pending_q = &entrybuf->cfe_pending_q;
4118
4119 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4120
4121 /*
4122 * Locate the chunks of data that we can pass to the next filter
4123 * A data chunk must be on mbuf boundaries
4124 */
4125 curlen = 0;
4126 while ((data = cfil_queue_first(pending_q)) != NULL) {
4127 struct cfil_entry *iter_entry;
4128 datalen = cfil_data_length(data, NULL, NULL);
4129
4130 #if DATA_DEBUG
4131 CFIL_LOG(LOG_DEBUG,
4132 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4133 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4134 passlen, curlen);
4135 #endif
4136
4137 if (curlen + datalen > passlen) {
4138 break;
4139 }
4140
4141 cfil_queue_remove(pending_q, data, datalen);
4142
4143 curlen += datalen;
4144
4145 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4146 iter_entry != NULL;
4147 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4148 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4149 data, datalen);
4150 /* 0 means passed so we can continue */
4151 if (error != 0) {
4152 break;
4153 }
4154 }
4155 /* When data has passed all filters, re-inject */
4156 if (error == 0) {
4157 if (outgoing) {
4158 cfil_queue_enqueue(
4159 &cfil_info->cfi_snd.cfi_inject_q,
4160 data, datalen);
4161 OSAddAtomic64(datalen,
4162 &cfil_stats.cfs_inject_q_out_enqueued);
4163 } else {
4164 cfil_queue_enqueue(
4165 &cfil_info->cfi_rcv.cfi_inject_q,
4166 data, datalen);
4167 OSAddAtomic64(datalen,
4168 &cfil_stats.cfs_inject_q_in_enqueued);
4169 }
4170 }
4171 }
4172
4173 CFIL_INFO_VERIFY(cfil_info);
4174
4175 return error;
4176 }
4177
4178 int
4179 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4180 uint64_t pass_offset, uint64_t peek_offset)
4181 {
4182 errno_t error = 0;
4183 struct cfil_entry *entry = NULL;
4184 struct cfe_buf *entrybuf;
4185 int updated = 0;
4186
4187 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4188
4189 socket_lock_assert_owned(so);
4190
4191 if (cfil_info == NULL) {
4192 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4193 (uint64_t)VM_KERNEL_ADDRPERM(so));
4194 error = 0;
4195 goto done;
4196 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4197 CFIL_LOG(LOG_ERR, "so %llx drop set",
4198 (uint64_t)VM_KERNEL_ADDRPERM(so));
4199 error = EPIPE;
4200 goto done;
4201 }
4202
4203 entry = &cfil_info->cfi_entries[kcunit - 1];
4204 if (outgoing) {
4205 entrybuf = &entry->cfe_snd;
4206 } else {
4207 entrybuf = &entry->cfe_rcv;
4208 }
4209
4210 /* Record updated offsets for this content filter */
4211 if (pass_offset > entrybuf->cfe_pass_offset) {
4212 entrybuf->cfe_pass_offset = pass_offset;
4213
4214 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4215 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4216 }
4217 updated = 1;
4218 } else {
4219 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4220 pass_offset, entrybuf->cfe_pass_offset);
4221 }
4222 /* Filter does not want or need to see data that's allowed to pass */
4223 if (peek_offset > entrybuf->cfe_pass_offset &&
4224 peek_offset > entrybuf->cfe_peek_offset) {
4225 entrybuf->cfe_peek_offset = peek_offset;
4226 updated = 1;
4227 }
4228 /* Nothing to do */
4229 if (updated == 0) {
4230 goto done;
4231 }
4232
4233 /* Move data held in control queue to pending queue if needed */
4234 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4235 if (error != 0) {
4236 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4237 error);
4238 goto done;
4239 }
4240 error = EJUSTRETURN;
4241
4242 done:
4243 /*
4244 * The filter is effectively detached when pass all from both sides
4245 * or when the socket is closed and no more data is waiting
4246 * to be delivered to the filter
4247 */
4248 if (entry != NULL &&
4249 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4250 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4251 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4252 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4253 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4254 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4255 #if LIFECYCLE_DEBUG
4256 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4257 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4258 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4259 #endif
4260 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4261 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4262 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4263 cfil_filters_attached(so) == 0) {
4264 #if LIFECYCLE_DEBUG
4265 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4266 #endif
4267 CFIL_LOG(LOG_INFO, "so %llx waking",
4268 (uint64_t)VM_KERNEL_ADDRPERM(so));
4269 wakeup((caddr_t)cfil_info);
4270 }
4271 }
4272 CFIL_INFO_VERIFY(cfil_info);
4273 CFIL_LOG(LOG_INFO, "return %d", error);
4274 return error;
4275 }
4276
4277 /*
4278 * Update pass offset for socket when no data is pending
4279 */
4280 static int
4281 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4282 {
4283 struct cfi_buf *cfi_buf;
4284 struct cfil_entry *entry;
4285 struct cfe_buf *entrybuf;
4286 uint32_t kcunit;
4287 uint64_t pass_offset = 0;
4288
4289 if (cfil_info == NULL) {
4290 return 0;
4291 }
4292
4293 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4294 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4295
4296 socket_lock_assert_owned(so);
4297
4298 if (outgoing) {
4299 cfi_buf = &cfil_info->cfi_snd;
4300 } else {
4301 cfi_buf = &cfil_info->cfi_rcv;
4302 }
4303
4304 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4305 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4306 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4307
4308 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4309 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4310 entry = &cfil_info->cfi_entries[kcunit - 1];
4311
4312 /* Are we attached to a filter? */
4313 if (entry->cfe_filter == NULL) {
4314 continue;
4315 }
4316
4317 if (outgoing) {
4318 entrybuf = &entry->cfe_snd;
4319 } else {
4320 entrybuf = &entry->cfe_rcv;
4321 }
4322
4323 if (pass_offset == 0 ||
4324 entrybuf->cfe_pass_offset < pass_offset) {
4325 pass_offset = entrybuf->cfe_pass_offset;
4326 }
4327 }
4328 cfi_buf->cfi_pass_offset = pass_offset;
4329 }
4330
4331 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4332 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4333
4334 return 0;
4335 }
4336
4337 int
4338 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4339 uint64_t pass_offset, uint64_t peek_offset)
4340 {
4341 errno_t error = 0;
4342
4343 CFIL_LOG(LOG_INFO, "");
4344
4345 socket_lock_assert_owned(so);
4346
4347 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4348 if (error != 0) {
4349 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4350 (uint64_t)VM_KERNEL_ADDRPERM(so),
4351 outgoing ? "out" : "in");
4352 goto release;
4353 }
4354
4355 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4356 pass_offset, peek_offset);
4357
4358 cfil_service_inject_queue(so, cfil_info, outgoing);
4359
4360 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4361 release:
4362 CFIL_INFO_VERIFY(cfil_info);
4363 cfil_release_sockbuf(so, outgoing);
4364
4365 return error;
4366 }
4367
4368
4369 static void
4370 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4371 {
4372 struct cfil_entry *entry;
4373 int kcunit;
4374 uint64_t drained;
4375
4376 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4377 goto done;
4378 }
4379
4380 socket_lock_assert_owned(so);
4381
4382 /*
4383 * Flush the output queues and ignore errors as long as
4384 * we are attached
4385 */
4386 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4387 if (cfil_info != NULL) {
4388 drained = 0;
4389 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4390 entry = &cfil_info->cfi_entries[kcunit - 1];
4391
4392 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4393 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4394 }
4395 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4396
4397 if (drained) {
4398 if (cfil_info->cfi_flags & CFIF_DROP) {
4399 OSIncrementAtomic(
4400 &cfil_stats.cfs_flush_out_drop);
4401 } else {
4402 OSIncrementAtomic(
4403 &cfil_stats.cfs_flush_out_close);
4404 }
4405 }
4406 }
4407 cfil_release_sockbuf(so, 1);
4408
4409 /*
4410 * Flush the input queues
4411 */
4412 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4413 if (cfil_info != NULL) {
4414 drained = 0;
4415 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4416 entry = &cfil_info->cfi_entries[kcunit - 1];
4417
4418 drained += cfil_queue_drain(
4419 &entry->cfe_rcv.cfe_ctl_q);
4420 drained += cfil_queue_drain(
4421 &entry->cfe_rcv.cfe_pending_q);
4422 }
4423 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4424
4425 if (drained) {
4426 if (cfil_info->cfi_flags & CFIF_DROP) {
4427 OSIncrementAtomic(
4428 &cfil_stats.cfs_flush_in_drop);
4429 } else {
4430 OSIncrementAtomic(
4431 &cfil_stats.cfs_flush_in_close);
4432 }
4433 }
4434 }
4435 cfil_release_sockbuf(so, 0);
4436 done:
4437 CFIL_INFO_VERIFY(cfil_info);
4438 }
4439
4440 int
4441 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4442 {
4443 errno_t error = 0;
4444 struct cfil_entry *entry;
4445 struct proc *p;
4446
4447 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4448 goto done;
4449 }
4450
4451 socket_lock_assert_owned(so);
4452
4453 entry = &cfil_info->cfi_entries[kcunit - 1];
4454
4455 /* Are we attached to the filter? */
4456 if (entry->cfe_filter == NULL) {
4457 goto done;
4458 }
4459
4460 cfil_info->cfi_flags |= CFIF_DROP;
4461
4462 p = current_proc();
4463
4464 /*
4465 * Force the socket to be marked defunct
4466 * (forcing fixed along with rdar://19391339)
4467 */
4468 if (so->so_cfil_db == NULL) {
4469 error = sosetdefunct(p, so,
4470 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4471 FALSE);
4472
4473 /* Flush the socket buffer and disconnect */
4474 if (error == 0) {
4475 error = sodefunct(p, so,
4476 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4477 }
4478 }
4479
4480 /* The filter is done, mark as detached */
4481 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4482 #if LIFECYCLE_DEBUG
4483 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4484 #endif
4485 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4486 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4487
4488 /* Pending data needs to go */
4489 cfil_flush_queues(so, cfil_info);
4490
4491 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4492 if (cfil_filters_attached(so) == 0) {
4493 CFIL_LOG(LOG_INFO, "so %llx waking",
4494 (uint64_t)VM_KERNEL_ADDRPERM(so));
4495 wakeup((caddr_t)cfil_info);
4496 }
4497 }
4498 done:
4499 return error;
4500 }
4501
4502 int
4503 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4504 {
4505 errno_t error = 0;
4506 struct cfil_info *cfil_info = NULL;
4507
4508 bool cfil_attached = false;
4509 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4510
4511 // Search and lock socket
4512 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4513 if (so == NULL) {
4514 error = ENOENT;
4515 } else {
4516 // The client gets a pass automatically
4517 cfil_info = (so->so_cfil_db != NULL) ?
4518 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4519
4520 if (cfil_attached) {
4521 #if VERDICT_DEBUG
4522 if (cfil_info != NULL) {
4523 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4524 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4525 (uint64_t)VM_KERNEL_ADDRPERM(so),
4526 cfil_info->cfi_sock_id);
4527 }
4528 #endif
4529 cfil_sock_received_verdict(so);
4530 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4531 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4532 } else {
4533 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4534 }
4535 socket_unlock(so, 1);
4536 }
4537
4538 return error;
4539 }
4540
4541 int
4542 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4543 {
4544 struct content_filter *cfc = NULL;
4545 cfil_crypto_state_t crypto_state = NULL;
4546 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4547
4548 CFIL_LOG(LOG_NOTICE, "");
4549
4550 if (content_filters == NULL) {
4551 CFIL_LOG(LOG_ERR, "no content filter");
4552 return EINVAL;
4553 }
4554 if (kcunit > MAX_CONTENT_FILTER) {
4555 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4556 kcunit, MAX_CONTENT_FILTER);
4557 return EINVAL;
4558 }
4559 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4560 if (crypto_state == NULL) {
4561 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4562 kcunit);
4563 return EINVAL;
4564 }
4565
4566 cfil_rw_lock_exclusive(&cfil_lck_rw);
4567
4568 cfc = content_filters[kcunit - 1];
4569 if (cfc->cf_kcunit != kcunit) {
4570 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4571 kcunit);
4572 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4573 cfil_crypto_cleanup_state(crypto_state);
4574 return EINVAL;
4575 }
4576 if (cfc->cf_crypto_state != NULL) {
4577 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4578 cfc->cf_crypto_state = NULL;
4579 }
4580 cfc->cf_crypto_state = crypto_state;
4581
4582 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4583 return 0;
4584 }
4585
4586 static int
4587 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4588 {
4589 struct cfil_entry *entry;
4590 struct cfe_buf *entrybuf;
4591 uint32_t kcunit;
4592
4593 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4594 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4595
4596 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4597 entry = &cfil_info->cfi_entries[kcunit - 1];
4598
4599 /* Are we attached to the filter? */
4600 if (entry->cfe_filter == NULL) {
4601 continue;
4602 }
4603
4604 if (outgoing) {
4605 entrybuf = &entry->cfe_snd;
4606 } else {
4607 entrybuf = &entry->cfe_rcv;
4608 }
4609
4610 entrybuf->cfe_ctl_q.q_start += datalen;
4611 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4612 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4613 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4614 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4615 }
4616
4617 entrybuf->cfe_ctl_q.q_end += datalen;
4618
4619 entrybuf->cfe_pending_q.q_start += datalen;
4620 entrybuf->cfe_pending_q.q_end += datalen;
4621 }
4622 CFIL_INFO_VERIFY(cfil_info);
4623 return 0;
4624 }
4625
4626 int
4627 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4628 struct mbuf *data, struct mbuf *control, uint32_t flags)
4629 {
4630 #pragma unused(to, control, flags)
4631 errno_t error = 0;
4632 unsigned int datalen;
4633 int mbcnt = 0;
4634 int mbnum = 0;
4635 int kcunit;
4636 struct cfi_buf *cfi_buf;
4637 struct mbuf *chain = NULL;
4638
4639 if (cfil_info == NULL) {
4640 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4641 (uint64_t)VM_KERNEL_ADDRPERM(so));
4642 error = 0;
4643 goto done;
4644 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4645 CFIL_LOG(LOG_ERR, "so %llx drop set",
4646 (uint64_t)VM_KERNEL_ADDRPERM(so));
4647 error = EPIPE;
4648 goto done;
4649 }
4650
4651 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4652
4653 if (outgoing) {
4654 cfi_buf = &cfil_info->cfi_snd;
4655 cfil_info->cfi_byte_outbound_count += datalen;
4656 } else {
4657 cfi_buf = &cfil_info->cfi_rcv;
4658 cfil_info->cfi_byte_inbound_count += datalen;
4659 }
4660
4661 cfi_buf->cfi_pending_last += datalen;
4662 cfi_buf->cfi_pending_mbcnt += mbcnt;
4663 cfi_buf->cfi_pending_mbnum += mbnum;
4664
4665 if (IS_UDP(so)) {
4666 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4667 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4668 cfi_buf->cfi_tail_drop_cnt++;
4669 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4670 cfi_buf->cfi_pending_mbnum -= mbnum;
4671 return EPIPE;
4672 }
4673 }
4674
4675 cfil_info_buf_verify(cfi_buf);
4676
4677 #if DATA_DEBUG
4678 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4679 (uint64_t)VM_KERNEL_ADDRPERM(so),
4680 outgoing ? "OUT" : "IN",
4681 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4682 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4683 cfi_buf->cfi_pending_last,
4684 cfi_buf->cfi_pending_mbcnt,
4685 cfi_buf->cfi_pass_offset);
4686 #endif
4687
4688 /* Fast path when below pass offset */
4689 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4690 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4691 #if DATA_DEBUG
4692 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4693 #endif
4694 } else {
4695 struct cfil_entry *iter_entry;
4696 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4697 // Is cfil attached to this filter?
4698 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4699 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4700 if (IS_UDP(so) && chain == NULL) {
4701 /* UDP only:
4702 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4703 * This full chain will be reinjected into socket after recieving verdict.
4704 */
4705 (void) cfil_udp_save_socket_state(cfil_info, data);
4706 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4707 if (chain == NULL) {
4708 return ENOBUFS;
4709 }
4710 data = chain;
4711 }
4712 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4713 datalen);
4714 }
4715 /* 0 means passed so continue with next filter */
4716 if (error != 0) {
4717 break;
4718 }
4719 }
4720 }
4721
4722 /* Move cursor if no filter claimed the data */
4723 if (error == 0) {
4724 cfi_buf->cfi_pending_first += datalen;
4725 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4726 cfi_buf->cfi_pending_mbnum -= mbnum;
4727 cfil_info_buf_verify(cfi_buf);
4728 }
4729 done:
4730 CFIL_INFO_VERIFY(cfil_info);
4731
4732 return error;
4733 }
4734
4735 /*
4736 * Callback from socket layer sosendxxx()
4737 */
4738 int
4739 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4740 struct mbuf *data, struct mbuf *control, uint32_t flags)
4741 {
4742 int error = 0;
4743
4744 if (IS_UDP(so)) {
4745 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4746 }
4747
4748 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4749 return 0;
4750 }
4751
4752 /*
4753 * Pass initial data for TFO.
4754 */
4755 if (IS_INITIAL_TFO_DATA(so)) {
4756 return 0;
4757 }
4758
4759 socket_lock_assert_owned(so);
4760
4761 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4762 CFIL_LOG(LOG_ERR, "so %llx drop set",
4763 (uint64_t)VM_KERNEL_ADDRPERM(so));
4764 return EPIPE;
4765 }
4766 if (control != NULL) {
4767 CFIL_LOG(LOG_ERR, "so %llx control",
4768 (uint64_t)VM_KERNEL_ADDRPERM(so));
4769 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4770 }
4771 if ((flags & MSG_OOB)) {
4772 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4773 (uint64_t)VM_KERNEL_ADDRPERM(so));
4774 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4775 }
4776 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4777 panic("so %p SB_LOCK not set", so);
4778 }
4779
4780 if (so->so_snd.sb_cfil_thread != NULL) {
4781 panic("%s sb_cfil_thread %p not NULL", __func__,
4782 so->so_snd.sb_cfil_thread);
4783 }
4784
4785 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4786
4787 return error;
4788 }
4789
4790 /*
4791 * Callback from socket layer sbappendxxx()
4792 */
4793 int
4794 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4795 struct mbuf *data, struct mbuf *control, uint32_t flags)
4796 {
4797 int error = 0;
4798
4799 if (IS_UDP(so)) {
4800 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4801 }
4802
4803 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4804 return 0;
4805 }
4806
4807 /*
4808 * Pass initial data for TFO.
4809 */
4810 if (IS_INITIAL_TFO_DATA(so)) {
4811 return 0;
4812 }
4813
4814 socket_lock_assert_owned(so);
4815
4816 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4817 CFIL_LOG(LOG_ERR, "so %llx drop set",
4818 (uint64_t)VM_KERNEL_ADDRPERM(so));
4819 return EPIPE;
4820 }
4821 if (control != NULL) {
4822 CFIL_LOG(LOG_ERR, "so %llx control",
4823 (uint64_t)VM_KERNEL_ADDRPERM(so));
4824 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4825 }
4826 if (data->m_type == MT_OOBDATA) {
4827 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4828 (uint64_t)VM_KERNEL_ADDRPERM(so));
4829 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4830 }
4831 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4832
4833 return error;
4834 }
4835
4836 /*
4837 * Callback from socket layer soshutdownxxx()
4838 *
4839 * We may delay the shutdown write if there's outgoing data in process.
4840 *
4841 * There is no point in delaying the shutdown read because the process
4842 * indicated that it does not want to read anymore data.
4843 */
4844 int
4845 cfil_sock_shutdown(struct socket *so, int *how)
4846 {
4847 int error = 0;
4848
4849 if (IS_UDP(so)) {
4850 return cfil_sock_udp_shutdown(so, how);
4851 }
4852
4853 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4854 goto done;
4855 }
4856
4857 socket_lock_assert_owned(so);
4858
4859 CFIL_LOG(LOG_INFO, "so %llx how %d",
4860 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4861
4862 /*
4863 * Check the state of the socket before the content filter
4864 */
4865 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4866 /* read already shut down */
4867 error = ENOTCONN;
4868 goto done;
4869 }
4870 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4871 /* write already shut down */
4872 error = ENOTCONN;
4873 goto done;
4874 }
4875
4876 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4877 CFIL_LOG(LOG_ERR, "so %llx drop set",
4878 (uint64_t)VM_KERNEL_ADDRPERM(so));
4879 goto done;
4880 }
4881
4882 /*
4883 * shutdown read: SHUT_RD or SHUT_RDWR
4884 */
4885 if (*how != SHUT_WR) {
4886 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4887 error = ENOTCONN;
4888 goto done;
4889 }
4890 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4891 cfil_sock_notify_shutdown(so, SHUT_RD);
4892 }
4893 /*
4894 * shutdown write: SHUT_WR or SHUT_RDWR
4895 */
4896 if (*how != SHUT_RD) {
4897 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4898 error = ENOTCONN;
4899 goto done;
4900 }
4901 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4902 cfil_sock_notify_shutdown(so, SHUT_WR);
4903 /*
4904 * When outgoing data is pending, we delay the shutdown at the
4905 * protocol level until the content filters give the final
4906 * verdict on the pending data.
4907 */
4908 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4909 /*
4910 * When shutting down the read and write sides at once
4911 * we can proceed to the final shutdown of the read
4912 * side. Otherwise, we just return.
4913 */
4914 if (*how == SHUT_WR) {
4915 error = EJUSTRETURN;
4916 } else if (*how == SHUT_RDWR) {
4917 *how = SHUT_RD;
4918 }
4919 }
4920 }
4921 done:
4922 return error;
4923 }
4924
4925 /*
4926 * This is called when the socket is closed and there is no more
4927 * opportunity for filtering
4928 */
4929 void
4930 cfil_sock_is_closed(struct socket *so)
4931 {
4932 errno_t error = 0;
4933 int kcunit;
4934
4935 if (IS_UDP(so)) {
4936 cfil_sock_udp_is_closed(so);
4937 return;
4938 }
4939
4940 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4941 return;
4942 }
4943
4944 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4945
4946 socket_lock_assert_owned(so);
4947
4948 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4949 /* Let the filters know of the closing */
4950 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4951 }
4952
4953 /* Last chance to push passed data out */
4954 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4955 if (error == 0) {
4956 cfil_service_inject_queue(so, so->so_cfil, 1);
4957 }
4958 cfil_release_sockbuf(so, 1);
4959
4960 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4961
4962 /* Pending data needs to go */
4963 cfil_flush_queues(so, so->so_cfil);
4964
4965 CFIL_INFO_VERIFY(so->so_cfil);
4966 }
4967
4968 /*
4969 * This is called when the socket is disconnected so let the filters
4970 * know about the disconnection and that no more data will come
4971 *
4972 * The how parameter has the same values as soshutown()
4973 */
4974 void
4975 cfil_sock_notify_shutdown(struct socket *so, int how)
4976 {
4977 errno_t error = 0;
4978 int kcunit;
4979
4980 if (IS_UDP(so)) {
4981 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4982 return;
4983 }
4984
4985 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4986 return;
4987 }
4988
4989 CFIL_LOG(LOG_INFO, "so %llx how %d",
4990 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4991
4992 socket_lock_assert_owned(so);
4993
4994 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4995 /* Disconnect incoming side */
4996 if (how != SHUT_WR) {
4997 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4998 }
4999 /* Disconnect outgoing side */
5000 if (how != SHUT_RD) {
5001 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5002 }
5003 }
5004 }
5005
5006 static int
5007 cfil_filters_attached(struct socket *so)
5008 {
5009 struct cfil_entry *entry;
5010 uint32_t kcunit;
5011 int attached = 0;
5012
5013 if (IS_UDP(so)) {
5014 return cfil_filters_udp_attached(so, FALSE);
5015 }
5016
5017 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5018 return 0;
5019 }
5020
5021 socket_lock_assert_owned(so);
5022
5023 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5024 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5025
5026 /* Are we attached to the filter? */
5027 if (entry->cfe_filter == NULL) {
5028 continue;
5029 }
5030 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5031 continue;
5032 }
5033 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5034 continue;
5035 }
5036 attached = 1;
5037 break;
5038 }
5039
5040 return attached;
5041 }
5042
5043 /*
5044 * This is called when the socket is closed and we are waiting for
5045 * the filters to gives the final pass or drop
5046 */
5047 void
5048 cfil_sock_close_wait(struct socket *so)
5049 {
5050 lck_mtx_t *mutex_held;
5051 struct timespec ts;
5052 int error;
5053
5054 if (IS_UDP(so)) {
5055 cfil_sock_udp_close_wait(so);
5056 return;
5057 }
5058
5059 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5060 return;
5061 }
5062
5063 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5064
5065 if (so->so_proto->pr_getlock != NULL) {
5066 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5067 } else {
5068 mutex_held = so->so_proto->pr_domain->dom_mtx;
5069 }
5070 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5071
5072 while (cfil_filters_attached(so)) {
5073 /*
5074 * Notify the filters we are going away so they can detach
5075 */
5076 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5077
5078 /*
5079 * Make sure we need to wait after the filter are notified
5080 * of the disconnection
5081 */
5082 if (cfil_filters_attached(so) == 0) {
5083 break;
5084 }
5085
5086 CFIL_LOG(LOG_INFO, "so %llx waiting",
5087 (uint64_t)VM_KERNEL_ADDRPERM(so));
5088
5089 ts.tv_sec = cfil_close_wait_timeout / 1000;
5090 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5091 NSEC_PER_USEC * 1000;
5092
5093 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5094 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5095 error = msleep((caddr_t)so->so_cfil, mutex_held,
5096 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5097 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5098
5099 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5100 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5101
5102 /*
5103 * Force close in case of timeout
5104 */
5105 if (error != 0) {
5106 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5107 break;
5108 }
5109 }
5110 }
5111
5112 /*
5113 * Returns the size of the data held by the content filter by using
5114 */
5115 int32_t
5116 cfil_sock_data_pending(struct sockbuf *sb)
5117 {
5118 struct socket *so = sb->sb_so;
5119 uint64_t pending = 0;
5120
5121 if (IS_UDP(so)) {
5122 return cfil_sock_udp_data_pending(sb, FALSE);
5123 }
5124
5125 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5126 struct cfi_buf *cfi_buf;
5127
5128 socket_lock_assert_owned(so);
5129
5130 if ((sb->sb_flags & SB_RECV) == 0) {
5131 cfi_buf = &so->so_cfil->cfi_snd;
5132 } else {
5133 cfi_buf = &so->so_cfil->cfi_rcv;
5134 }
5135
5136 pending = cfi_buf->cfi_pending_last -
5137 cfi_buf->cfi_pending_first;
5138
5139 /*
5140 * If we are limited by the "chars of mbufs used" roughly
5141 * adjust so we won't overcommit
5142 */
5143 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5144 pending = cfi_buf->cfi_pending_mbcnt;
5145 }
5146 }
5147
5148 VERIFY(pending < INT32_MAX);
5149
5150 return (int32_t)(pending);
5151 }
5152
5153 /*
5154 * Return the socket buffer space used by data being held by content filters
5155 * so processes won't clog the socket buffer
5156 */
5157 int32_t
5158 cfil_sock_data_space(struct sockbuf *sb)
5159 {
5160 struct socket *so = sb->sb_so;
5161 uint64_t pending = 0;
5162
5163 if (IS_UDP(so)) {
5164 return cfil_sock_udp_data_pending(sb, TRUE);
5165 }
5166
5167 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5168 so->so_snd.sb_cfil_thread != current_thread()) {
5169 struct cfi_buf *cfi_buf;
5170
5171 socket_lock_assert_owned(so);
5172
5173 if ((sb->sb_flags & SB_RECV) == 0) {
5174 cfi_buf = &so->so_cfil->cfi_snd;
5175 } else {
5176 cfi_buf = &so->so_cfil->cfi_rcv;
5177 }
5178
5179 pending = cfi_buf->cfi_pending_last -
5180 cfi_buf->cfi_pending_first;
5181
5182 /*
5183 * If we are limited by the "chars of mbufs used" roughly
5184 * adjust so we won't overcommit
5185 */
5186 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5187 pending = cfi_buf->cfi_pending_mbcnt;
5188 }
5189 }
5190
5191 VERIFY(pending < INT32_MAX);
5192
5193 return (int32_t)(pending);
5194 }
5195
5196 /*
5197 * A callback from the socket and protocol layer when data becomes
5198 * available in the socket buffer to give a chance for the content filter
5199 * to re-inject data that was held back
5200 */
5201 void
5202 cfil_sock_buf_update(struct sockbuf *sb)
5203 {
5204 int outgoing;
5205 int error;
5206 struct socket *so = sb->sb_so;
5207
5208 if (IS_UDP(so)) {
5209 cfil_sock_udp_buf_update(sb);
5210 return;
5211 }
5212
5213 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5214 return;
5215 }
5216
5217 if (!cfil_sbtrim) {
5218 return;
5219 }
5220
5221 socket_lock_assert_owned(so);
5222
5223 if ((sb->sb_flags & SB_RECV) == 0) {
5224 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5225 return;
5226 }
5227 outgoing = 1;
5228 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5229 } else {
5230 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5231 return;
5232 }
5233 outgoing = 0;
5234 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5235 }
5236
5237 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5238 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5239
5240 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5241 if (error == 0) {
5242 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5243 }
5244 cfil_release_sockbuf(so, outgoing);
5245 }
5246
5247 int
5248 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5249 struct sysctl_req *req)
5250 {
5251 #pragma unused(oidp, arg1, arg2)
5252 int error = 0;
5253 size_t len = 0;
5254 u_int32_t i;
5255
5256 /* Read only */
5257 if (req->newptr != USER_ADDR_NULL) {
5258 return EPERM;
5259 }
5260
5261 cfil_rw_lock_shared(&cfil_lck_rw);
5262
5263 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5264 struct cfil_filter_stat filter_stat;
5265 struct content_filter *cfc = content_filters[i];
5266
5267 if (cfc == NULL) {
5268 continue;
5269 }
5270
5271 /* If just asking for the size */
5272 if (req->oldptr == USER_ADDR_NULL) {
5273 len += sizeof(struct cfil_filter_stat);
5274 continue;
5275 }
5276
5277 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5278 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5279 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5280 filter_stat.cfs_flags = cfc->cf_flags;
5281 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5282 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5283
5284 error = SYSCTL_OUT(req, &filter_stat,
5285 sizeof(struct cfil_filter_stat));
5286 if (error != 0) {
5287 break;
5288 }
5289 }
5290 /* If just asking for the size */
5291 if (req->oldptr == USER_ADDR_NULL) {
5292 req->oldidx = len;
5293 }
5294
5295 cfil_rw_unlock_shared(&cfil_lck_rw);
5296
5297 #if SHOW_DEBUG
5298 if (req->oldptr != USER_ADDR_NULL) {
5299 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5300 cfil_filter_show(i);
5301 }
5302 }
5303 #endif
5304
5305 return error;
5306 }
5307
5308 static int
5309 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5310 struct sysctl_req *req)
5311 {
5312 #pragma unused(oidp, arg1, arg2)
5313 int error = 0;
5314 u_int32_t i;
5315 struct cfil_info *cfi;
5316
5317 /* Read only */
5318 if (req->newptr != USER_ADDR_NULL) {
5319 return EPERM;
5320 }
5321
5322 cfil_rw_lock_shared(&cfil_lck_rw);
5323
5324 /*
5325 * If just asking for the size,
5326 */
5327 if (req->oldptr == USER_ADDR_NULL) {
5328 req->oldidx = cfil_sock_attached_count *
5329 sizeof(struct cfil_sock_stat);
5330 /* Bump the length in case new sockets gets attached */
5331 req->oldidx += req->oldidx >> 3;
5332 goto done;
5333 }
5334
5335 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5336 struct cfil_entry *entry;
5337 struct cfil_sock_stat stat;
5338 struct socket *so = cfi->cfi_so;
5339
5340 bzero(&stat, sizeof(struct cfil_sock_stat));
5341 stat.cfs_len = sizeof(struct cfil_sock_stat);
5342 stat.cfs_sock_id = cfi->cfi_sock_id;
5343 stat.cfs_flags = cfi->cfi_flags;
5344
5345 if (so != NULL) {
5346 stat.cfs_pid = so->last_pid;
5347 memcpy(stat.cfs_uuid, so->last_uuid,
5348 sizeof(uuid_t));
5349 if (so->so_flags & SOF_DELEGATED) {
5350 stat.cfs_e_pid = so->e_pid;
5351 memcpy(stat.cfs_e_uuid, so->e_uuid,
5352 sizeof(uuid_t));
5353 } else {
5354 stat.cfs_e_pid = so->last_pid;
5355 memcpy(stat.cfs_e_uuid, so->last_uuid,
5356 sizeof(uuid_t));
5357 }
5358
5359 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5360 stat.cfs_sock_type = so->so_proto->pr_type;
5361 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5362 }
5363
5364 stat.cfs_snd.cbs_pending_first =
5365 cfi->cfi_snd.cfi_pending_first;
5366 stat.cfs_snd.cbs_pending_last =
5367 cfi->cfi_snd.cfi_pending_last;
5368 stat.cfs_snd.cbs_inject_q_len =
5369 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5370 stat.cfs_snd.cbs_pass_offset =
5371 cfi->cfi_snd.cfi_pass_offset;
5372
5373 stat.cfs_rcv.cbs_pending_first =
5374 cfi->cfi_rcv.cfi_pending_first;
5375 stat.cfs_rcv.cbs_pending_last =
5376 cfi->cfi_rcv.cfi_pending_last;
5377 stat.cfs_rcv.cbs_inject_q_len =
5378 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5379 stat.cfs_rcv.cbs_pass_offset =
5380 cfi->cfi_rcv.cfi_pass_offset;
5381
5382 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5383 struct cfil_entry_stat *estat;
5384 struct cfe_buf *ebuf;
5385 struct cfe_buf_stat *sbuf;
5386
5387 entry = &cfi->cfi_entries[i];
5388
5389 estat = &stat.ces_entries[i];
5390
5391 estat->ces_len = sizeof(struct cfil_entry_stat);
5392 estat->ces_filter_id = entry->cfe_filter ?
5393 entry->cfe_filter->cf_kcunit : 0;
5394 estat->ces_flags = entry->cfe_flags;
5395 estat->ces_necp_control_unit =
5396 entry->cfe_necp_control_unit;
5397
5398 estat->ces_last_event.tv_sec =
5399 (int64_t)entry->cfe_last_event.tv_sec;
5400 estat->ces_last_event.tv_usec =
5401 (int64_t)entry->cfe_last_event.tv_usec;
5402
5403 estat->ces_last_action.tv_sec =
5404 (int64_t)entry->cfe_last_action.tv_sec;
5405 estat->ces_last_action.tv_usec =
5406 (int64_t)entry->cfe_last_action.tv_usec;
5407
5408 ebuf = &entry->cfe_snd;
5409 sbuf = &estat->ces_snd;
5410 sbuf->cbs_pending_first =
5411 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5412 sbuf->cbs_pending_last =
5413 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5414 sbuf->cbs_ctl_first =
5415 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5416 sbuf->cbs_ctl_last =
5417 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5418 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5419 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5420 sbuf->cbs_peeked = ebuf->cfe_peeked;
5421
5422 ebuf = &entry->cfe_rcv;
5423 sbuf = &estat->ces_rcv;
5424 sbuf->cbs_pending_first =
5425 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5426 sbuf->cbs_pending_last =
5427 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5428 sbuf->cbs_ctl_first =
5429 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5430 sbuf->cbs_ctl_last =
5431 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5432 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5433 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5434 sbuf->cbs_peeked = ebuf->cfe_peeked;
5435 }
5436 error = SYSCTL_OUT(req, &stat,
5437 sizeof(struct cfil_sock_stat));
5438 if (error != 0) {
5439 break;
5440 }
5441 }
5442 done:
5443 cfil_rw_unlock_shared(&cfil_lck_rw);
5444
5445 #if SHOW_DEBUG
5446 if (req->oldptr != USER_ADDR_NULL) {
5447 cfil_info_show();
5448 }
5449 #endif
5450
5451 return error;
5452 }
5453
5454 /*
5455 * UDP Socket Support
5456 */
5457 static void
5458 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5459 {
5460 char local[MAX_IPv6_STR_LEN + 6];
5461 char remote[MAX_IPv6_STR_LEN + 6];
5462 const void *addr;
5463
5464 // No sock or not UDP, no-op
5465 if (so == NULL || entry == NULL) {
5466 return;
5467 }
5468
5469 local[0] = remote[0] = 0x0;
5470
5471 switch (entry->cfentry_family) {
5472 case AF_INET6:
5473 addr = &entry->cfentry_laddr.addr6;
5474 inet_ntop(AF_INET6, addr, local, sizeof(local));
5475 addr = &entry->cfentry_faddr.addr6;
5476 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5477 break;
5478 case AF_INET:
5479 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5480 inet_ntop(AF_INET, addr, local, sizeof(local));
5481 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5482 inet_ntop(AF_INET, addr, remote, sizeof(local));
5483 break;
5484 default:
5485 return;
5486 }
5487
5488 CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5489 msg,
5490 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5491 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5492 }
5493
5494 static void
5495 cfil_inp_log(int level, struct socket *so, const char* msg)
5496 {
5497 struct inpcb *inp = NULL;
5498 char local[MAX_IPv6_STR_LEN + 6];
5499 char remote[MAX_IPv6_STR_LEN + 6];
5500 const void *addr;
5501
5502 if (so == NULL) {
5503 return;
5504 }
5505
5506 inp = sotoinpcb(so);
5507 if (inp == NULL) {
5508 return;
5509 }
5510
5511 local[0] = remote[0] = 0x0;
5512
5513 #if INET6
5514 if (inp->inp_vflag & INP_IPV6) {
5515 addr = &inp->in6p_laddr.s6_addr32;
5516 inet_ntop(AF_INET6, addr, local, sizeof(local));
5517 addr = &inp->in6p_faddr.s6_addr32;
5518 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5519 } else
5520 #endif /* INET6 */
5521 {
5522 addr = &inp->inp_laddr.s_addr;
5523 inet_ntop(AF_INET, addr, local, sizeof(local));
5524 addr = &inp->inp_faddr.s_addr;
5525 inet_ntop(AF_INET, addr, remote, sizeof(local));
5526 }
5527
5528 if (so->so_cfil != NULL) {
5529 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5530 msg, IS_UDP(so) ? "UDP" : "TCP",
5531 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5532 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5533 } else {
5534 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5535 msg, IS_UDP(so) ? "UDP" : "TCP",
5536 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5537 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5538 }
5539 }
5540
5541 static void
5542 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5543 {
5544 if (cfil_info == NULL) {
5545 return;
5546 }
5547
5548 if (cfil_info->cfi_hash_entry != NULL) {
5549 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5550 } else {
5551 cfil_inp_log(level, cfil_info->cfi_so, msg);
5552 }
5553 }
5554
5555 errno_t
5556 cfil_db_init(struct socket *so)
5557 {
5558 errno_t error = 0;
5559 struct cfil_db *db = NULL;
5560
5561 CFIL_LOG(LOG_INFO, "");
5562
5563 db = zalloc(cfil_db_zone);
5564 if (db == NULL) {
5565 error = ENOMEM;
5566 goto done;
5567 }
5568 bzero(db, sizeof(struct cfil_db));
5569 db->cfdb_so = so;
5570 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5571 if (db->cfdb_hashbase == NULL) {
5572 zfree(cfil_db_zone, db);
5573 db = NULL;
5574 error = ENOMEM;
5575 goto done;
5576 }
5577
5578 so->so_cfil_db = db;
5579
5580 done:
5581 return error;
5582 }
5583
5584 void
5585 cfil_db_free(struct socket *so)
5586 {
5587 struct cfil_hash_entry *entry = NULL;
5588 struct cfil_hash_entry *temp_entry = NULL;
5589 struct cfilhashhead *cfilhash = NULL;
5590 struct cfil_db *db = NULL;
5591
5592 CFIL_LOG(LOG_INFO, "");
5593
5594 if (so == NULL || so->so_cfil_db == NULL) {
5595 return;
5596 }
5597 db = so->so_cfil_db;
5598
5599 #if LIFECYCLE_DEBUG
5600 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5601 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5602 #endif
5603
5604 for (int i = 0; i < CFILHASHSIZE; i++) {
5605 cfilhash = &db->cfdb_hashbase[i];
5606 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5607 if (entry->cfentry_cfil != NULL) {
5608 #if LIFECYCLE_DEBUG
5609 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5610 #endif
5611 cfil_info_free(entry->cfentry_cfil);
5612 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5613 entry->cfentry_cfil = NULL;
5614 }
5615
5616 cfil_db_delete_entry(db, entry);
5617 if (so->so_flags & SOF_CONTENT_FILTER) {
5618 if (db->cfdb_count == 0) {
5619 so->so_flags &= ~SOF_CONTENT_FILTER;
5620 }
5621 VERIFY(so->so_usecount > 0);
5622 so->so_usecount--;
5623 }
5624 }
5625 }
5626
5627 // Make sure all entries are cleaned up!
5628 VERIFY(db->cfdb_count == 0);
5629 #if LIFECYCLE_DEBUG
5630 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5631 #endif
5632
5633 FREE(db->cfdb_hashbase, M_CFIL);
5634 zfree(cfil_db_zone, db);
5635 so->so_cfil_db = NULL;
5636 }
5637
5638 static bool
5639 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5640 {
5641 struct sockaddr_in *sin = NULL;
5642 struct sockaddr_in6 *sin6 = NULL;
5643
5644 if (entry == NULL || addr == NULL) {
5645 return FALSE;
5646 }
5647
5648 switch (addr->sa_family) {
5649 case AF_INET:
5650 sin = satosin(addr);
5651 if (sin->sin_len != sizeof(*sin)) {
5652 return FALSE;
5653 }
5654 if (isLocal == TRUE) {
5655 entry->cfentry_lport = sin->sin_port;
5656 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5657 } else {
5658 entry->cfentry_fport = sin->sin_port;
5659 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5660 }
5661 entry->cfentry_family = AF_INET;
5662 return TRUE;
5663 case AF_INET6:
5664 sin6 = satosin6(addr);
5665 if (sin6->sin6_len != sizeof(*sin6)) {
5666 return FALSE;
5667 }
5668 if (isLocal == TRUE) {
5669 entry->cfentry_lport = sin6->sin6_port;
5670 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5671 } else {
5672 entry->cfentry_fport = sin6->sin6_port;
5673 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5674 }
5675 entry->cfentry_family = AF_INET6;
5676 return TRUE;
5677 default:
5678 return FALSE;
5679 }
5680 }
5681
5682 static bool
5683 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5684 {
5685 if (entry == NULL || inp == NULL) {
5686 return FALSE;
5687 }
5688
5689 if (inp->inp_vflag & INP_IPV4) {
5690 if (isLocal == TRUE) {
5691 entry->cfentry_lport = inp->inp_lport;
5692 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5693 } else {
5694 entry->cfentry_fport = inp->inp_fport;
5695 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5696 }
5697 entry->cfentry_family = AF_INET;
5698 return TRUE;
5699 } else if (inp->inp_vflag & INP_IPV6) {
5700 if (isLocal == TRUE) {
5701 entry->cfentry_lport = inp->inp_lport;
5702 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5703 } else {
5704 entry->cfentry_fport = inp->inp_fport;
5705 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5706 }
5707 entry->cfentry_family = AF_INET6;
5708 return TRUE;
5709 }
5710 return FALSE;
5711 }
5712
5713 bool
5714 check_port(struct sockaddr *addr, u_short port)
5715 {
5716 struct sockaddr_in *sin = NULL;
5717 struct sockaddr_in6 *sin6 = NULL;
5718
5719 if (addr == NULL || port == 0) {
5720 return FALSE;
5721 }
5722
5723 switch (addr->sa_family) {
5724 case AF_INET:
5725 sin = satosin(addr);
5726 if (sin->sin_len != sizeof(*sin)) {
5727 return FALSE;
5728 }
5729 if (port == ntohs(sin->sin_port)) {
5730 return TRUE;
5731 }
5732 break;
5733 case AF_INET6:
5734 sin6 = satosin6(addr);
5735 if (sin6->sin6_len != sizeof(*sin6)) {
5736 return FALSE;
5737 }
5738 if (port == ntohs(sin6->sin6_port)) {
5739 return TRUE;
5740 }
5741 break;
5742 default:
5743 break;
5744 }
5745 return FALSE;
5746 }
5747
5748 struct cfil_hash_entry *
5749 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5750 {
5751 struct cfilhashhead *cfilhash = NULL;
5752 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5753 struct cfil_hash_entry *nextentry;
5754
5755 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5756 return NULL;
5757 }
5758
5759 flowhash &= db->cfdb_hashmask;
5760 cfilhash = &db->cfdb_hashbase[flowhash];
5761
5762 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5763 if (nextentry->cfentry_cfil != NULL &&
5764 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5765 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5766 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5767 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5768 return nextentry;
5769 }
5770 }
5771
5772 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5773 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5774 return NULL;
5775 }
5776
5777 struct cfil_hash_entry *
5778 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5779 {
5780 struct cfil_hash_entry matchentry;
5781 struct cfil_hash_entry *nextentry = NULL;
5782 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5783 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5784 int inp_hash_element = 0;
5785 struct cfilhashhead *cfilhash = NULL;
5786
5787 CFIL_LOG(LOG_INFO, "");
5788
5789 if (inp == NULL) {
5790 goto done;
5791 }
5792
5793 if (local != NULL) {
5794 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5795 } else {
5796 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5797 }
5798 if (remote != NULL) {
5799 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5800 } else {
5801 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5802 }
5803
5804 #if INET6
5805 if (inp->inp_vflag & INP_IPV6) {
5806 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5807 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5808 } else
5809 #endif /* INET6 */
5810 {
5811 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5812 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5813 }
5814
5815 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5816 matchentry.cfentry_lport, matchentry.cfentry_fport);
5817 inp_hash_element &= db->cfdb_hashmask;
5818
5819 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5820
5821 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5822 #if INET6
5823 if ((inp->inp_vflag & INP_IPV6) &&
5824 nextentry->cfentry_lport == matchentry.cfentry_lport &&
5825 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5826 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5827 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5828 #if DATA_DEBUG
5829 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5830 #endif
5831 return nextentry;
5832 } else
5833 #endif /* INET6 */
5834 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5835 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5836 nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5837 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5838 #if DATA_DEBUG
5839 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5840 #endif
5841 return nextentry;
5842 }
5843 }
5844
5845 done:
5846 #if DATA_DEBUG
5847 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5848 #endif
5849 return NULL;
5850 }
5851
5852 void
5853 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5854 {
5855 if (hash_entry == NULL) {
5856 return;
5857 }
5858 if (db == NULL || db->cfdb_count == 0) {
5859 return;
5860 }
5861 db->cfdb_count--;
5862 if (db->cfdb_only_entry == hash_entry) {
5863 db->cfdb_only_entry = NULL;
5864 }
5865 LIST_REMOVE(hash_entry, cfentry_link);
5866 zfree(cfil_hash_entry_zone, hash_entry);
5867 }
5868
5869 struct cfil_hash_entry *
5870 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5871 {
5872 struct cfil_hash_entry *entry = NULL;
5873 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5874 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5875 int inp_hash_element = 0;
5876 struct cfilhashhead *cfilhash = NULL;
5877
5878 CFIL_LOG(LOG_INFO, "");
5879
5880 if (inp == NULL) {
5881 goto done;
5882 }
5883
5884 entry = zalloc(cfil_hash_entry_zone);
5885 if (entry == NULL) {
5886 goto done;
5887 }
5888 bzero(entry, sizeof(struct cfil_hash_entry));
5889
5890 if (local != NULL) {
5891 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5892 } else {
5893 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5894 }
5895 if (remote != NULL) {
5896 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5897 } else {
5898 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5899 }
5900 entry->cfentry_lastused = net_uptime();
5901
5902 #if INET6
5903 if (inp->inp_vflag & INP_IPV6) {
5904 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5905 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5906 } else
5907 #endif /* INET6 */
5908 {
5909 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5910 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5911 }
5912 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5913 entry->cfentry_lport, entry->cfentry_fport);
5914 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5915
5916 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5917
5918 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5919 db->cfdb_count++;
5920 db->cfdb_only_entry = entry;
5921 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5922
5923 done:
5924 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5925 return entry;
5926 }
5927
5928 struct cfil_info *
5929 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5930 {
5931 struct cfil_hash_entry *hash_entry = NULL;
5932
5933 CFIL_LOG(LOG_INFO, "");
5934
5935 if (db == NULL || id == 0) {
5936 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5937 db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
5938 return NULL;
5939 }
5940
5941 // This is an optimization for connected UDP socket which only has one flow.
5942 // No need to do the hash lookup.
5943 if (db->cfdb_count == 1) {
5944 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5945 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5946 return db->cfdb_only_entry->cfentry_cfil;
5947 }
5948 }
5949
5950 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5951 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
5952 }
5953
5954 struct cfil_hash_entry *
5955 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5956 {
5957 struct cfil_hash_entry *hash_entry = NULL;
5958
5959 errno_t error = 0;
5960 socket_lock_assert_owned(so);
5961
5962 // If new socket, allocate cfil db
5963 if (so->so_cfil_db == NULL) {
5964 if (cfil_db_init(so) != 0) {
5965 return NULL;
5966 }
5967 }
5968
5969 // See if flow already exists.
5970 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5971 if (hash_entry != NULL) {
5972 return hash_entry;
5973 }
5974
5975 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5976 if (hash_entry == NULL) {
5977 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5978 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5979 return NULL;
5980 }
5981
5982 if (cfil_info_alloc(so, hash_entry) == NULL ||
5983 hash_entry->cfentry_cfil == NULL) {
5984 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5985 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5986 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5987 return NULL;
5988 }
5989 hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
5990
5991 #if LIFECYCLE_DEBUG
5992 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5993 #endif
5994
5995 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5996 cfil_info_free(hash_entry->cfentry_cfil);
5997 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5998 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5999 filter_control_unit);
6000 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6001 return NULL;
6002 }
6003 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6004 (uint64_t)VM_KERNEL_ADDRPERM(so),
6005 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6006
6007 so->so_flags |= SOF_CONTENT_FILTER;
6008 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6009
6010 /* Hold a reference on the socket for each flow */
6011 so->so_usecount++;
6012
6013 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6014 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6015 /* We can recover from flow control or out of memory errors */
6016 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6017 return NULL;
6018 }
6019
6020 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6021 return hash_entry;
6022 }
6023
6024 errno_t
6025 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6026 struct sockaddr *local, struct sockaddr *remote,
6027 struct mbuf *data, struct mbuf *control, uint32_t flags)
6028 {
6029 #pragma unused(outgoing, so, local, remote, data, control, flags)
6030 errno_t error = 0;
6031 uint32_t filter_control_unit;
6032 struct cfil_hash_entry *hash_entry = NULL;
6033 struct cfil_info *cfil_info = NULL;
6034
6035 socket_lock_assert_owned(so);
6036
6037 if (cfil_active_count == 0) {
6038 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6039 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6040 return error;
6041 }
6042
6043 // Socket has been blessed
6044 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6045 return error;
6046 }
6047
6048 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6049 if (filter_control_unit == 0) {
6050 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6051 return error;
6052 }
6053
6054 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6055 return error;
6056 }
6057
6058 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6059 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6060 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6061 return error;
6062 }
6063
6064 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
6065 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6066 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6067 return EPIPE;
6068 }
6069 // Update last used timestamp, this is for flow Idle TO
6070 hash_entry->cfentry_lastused = net_uptime();
6071 cfil_info = hash_entry->cfentry_cfil;
6072
6073 if (cfil_info->cfi_flags & CFIF_DROP) {
6074 #if DATA_DEBUG
6075 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6076 #endif
6077 return EPIPE;
6078 }
6079 if (control != NULL) {
6080 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6081 }
6082 if (data->m_type == MT_OOBDATA) {
6083 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6084 (uint64_t)VM_KERNEL_ADDRPERM(so));
6085 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6086 }
6087
6088 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6089
6090 return error;
6091 }
6092
6093 /*
6094 * Go through all UDP flows for specified socket and returns TRUE if
6095 * any flow is still attached. If need_wait is TRUE, wait on first
6096 * attached flow.
6097 */
6098 static int
6099 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6100 {
6101 struct timespec ts;
6102 lck_mtx_t *mutex_held;
6103 struct cfilhashhead *cfilhash = NULL;
6104 struct cfil_db *db = NULL;
6105 struct cfil_hash_entry *hash_entry = NULL;
6106 struct cfil_hash_entry *temp_hash_entry = NULL;
6107 struct cfil_info *cfil_info = NULL;
6108 struct cfil_entry *entry = NULL;
6109 errno_t error = 0;
6110 int kcunit;
6111 int attached = 0;
6112 uint64_t sock_flow_id = 0;
6113
6114 socket_lock_assert_owned(so);
6115
6116 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6117 if (so->so_proto->pr_getlock != NULL) {
6118 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6119 } else {
6120 mutex_held = so->so_proto->pr_domain->dom_mtx;
6121 }
6122 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6123
6124 db = so->so_cfil_db;
6125
6126 for (int i = 0; i < CFILHASHSIZE; i++) {
6127 cfilhash = &db->cfdb_hashbase[i];
6128
6129 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6130 if (hash_entry->cfentry_cfil != NULL) {
6131 cfil_info = hash_entry->cfentry_cfil;
6132 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6133 entry = &cfil_info->cfi_entries[kcunit - 1];
6134
6135 /* Are we attached to the filter? */
6136 if (entry->cfe_filter == NULL) {
6137 continue;
6138 }
6139
6140 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6141 continue;
6142 }
6143 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6144 continue;
6145 }
6146
6147 attached = 1;
6148
6149 if (need_wait == TRUE) {
6150 #if LIFECYCLE_DEBUG
6151 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6152 #endif
6153
6154 ts.tv_sec = cfil_close_wait_timeout / 1000;
6155 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6156 NSEC_PER_USEC * 1000;
6157
6158 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6159 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6160 sock_flow_id = cfil_info->cfi_sock_id;
6161
6162 error = msleep((caddr_t)cfil_info, mutex_held,
6163 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6164
6165 // Woke up from sleep, validate if cfil_info is still valid
6166 if (so->so_cfil_db == NULL ||
6167 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6168 // cfil_info is not valid, do not continue
6169 goto done;
6170 }
6171
6172 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6173
6174 #if LIFECYCLE_DEBUG
6175 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6176 #endif
6177
6178 /*
6179 * Force close in case of timeout
6180 */
6181 if (error != 0) {
6182 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6183 #if LIFECYCLE_DEBUG
6184 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6185 #endif
6186 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6187 }
6188 }
6189 goto done;
6190 }
6191 }
6192 }
6193 }
6194 }
6195
6196 done:
6197 return attached;
6198 }
6199
6200 int32_t
6201 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6202 {
6203 struct socket *so = sb->sb_so;
6204 struct cfi_buf *cfi_buf;
6205 uint64_t pending = 0;
6206 uint64_t total_pending = 0;
6207 struct cfilhashhead *cfilhash = NULL;
6208 struct cfil_db *db = NULL;
6209 struct cfil_hash_entry *hash_entry = NULL;
6210 struct cfil_hash_entry *temp_hash_entry = NULL;
6211
6212 socket_lock_assert_owned(so);
6213
6214 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6215 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6216 db = so->so_cfil_db;
6217
6218 for (int i = 0; i < CFILHASHSIZE; i++) {
6219 cfilhash = &db->cfdb_hashbase[i];
6220
6221 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6222 if (hash_entry->cfentry_cfil != NULL) {
6223 if ((sb->sb_flags & SB_RECV) == 0) {
6224 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6225 } else {
6226 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6227 }
6228
6229 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6230 /*
6231 * If we are limited by the "chars of mbufs used" roughly
6232 * adjust so we won't overcommit
6233 */
6234 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6235 pending = cfi_buf->cfi_pending_mbcnt;
6236 }
6237
6238 total_pending += pending;
6239 }
6240 }
6241 }
6242
6243 VERIFY(total_pending < INT32_MAX);
6244 #if DATA_DEBUG
6245 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6246 (uint64_t)VM_KERNEL_ADDRPERM(so),
6247 total_pending, check_thread);
6248 #endif
6249 }
6250
6251 return (int32_t)(total_pending);
6252 }
6253
6254 int
6255 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6256 {
6257 struct cfil_info *cfil_info = NULL;
6258 struct cfilhashhead *cfilhash = NULL;
6259 struct cfil_db *db = NULL;
6260 struct cfil_hash_entry *hash_entry = NULL;
6261 struct cfil_hash_entry *temp_hash_entry = NULL;
6262 errno_t error = 0;
6263 int done_count = 0;
6264 int kcunit;
6265
6266 socket_lock_assert_owned(so);
6267
6268 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6269 db = so->so_cfil_db;
6270
6271 for (int i = 0; i < CFILHASHSIZE; i++) {
6272 cfilhash = &db->cfdb_hashbase[i];
6273
6274 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6275 if (hash_entry->cfentry_cfil != NULL) {
6276 cfil_info = hash_entry->cfentry_cfil;
6277
6278 // This flow is marked as DROP
6279 if (cfil_info->cfi_flags & drop_flag) {
6280 done_count++;
6281 continue;
6282 }
6283
6284 // This flow has been shut already, skip
6285 if (cfil_info->cfi_flags & shut_flag) {
6286 continue;
6287 }
6288 // Mark flow as shut
6289 cfil_info->cfi_flags |= shut_flag;
6290 done_count++;
6291
6292 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6293 /* Disconnect incoming side */
6294 if (how != SHUT_WR) {
6295 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6296 }
6297 /* Disconnect outgoing side */
6298 if (how != SHUT_RD) {
6299 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6300 }
6301 }
6302 }
6303 }
6304 }
6305 }
6306
6307 if (done_count == 0) {
6308 error = ENOTCONN;
6309 }
6310 return error;
6311 }
6312
6313 int
6314 cfil_sock_udp_shutdown(struct socket *so, int *how)
6315 {
6316 int error = 0;
6317
6318 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6319 goto done;
6320 }
6321
6322 socket_lock_assert_owned(so);
6323
6324 CFIL_LOG(LOG_INFO, "so %llx how %d",
6325 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6326
6327 /*
6328 * Check the state of the socket before the content filter
6329 */
6330 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6331 /* read already shut down */
6332 error = ENOTCONN;
6333 goto done;
6334 }
6335 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6336 /* write already shut down */
6337 error = ENOTCONN;
6338 goto done;
6339 }
6340
6341 /*
6342 * shutdown read: SHUT_RD or SHUT_RDWR
6343 */
6344 if (*how != SHUT_WR) {
6345 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6346 if (error != 0) {
6347 goto done;
6348 }
6349 }
6350 /*
6351 * shutdown write: SHUT_WR or SHUT_RDWR
6352 */
6353 if (*how != SHUT_RD) {
6354 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6355 if (error != 0) {
6356 goto done;
6357 }
6358
6359 /*
6360 * When outgoing data is pending, we delay the shutdown at the
6361 * protocol level until the content filters give the final
6362 * verdict on the pending data.
6363 */
6364 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6365 /*
6366 * When shutting down the read and write sides at once
6367 * we can proceed to the final shutdown of the read
6368 * side. Otherwise, we just return.
6369 */
6370 if (*how == SHUT_WR) {
6371 error = EJUSTRETURN;
6372 } else if (*how == SHUT_RDWR) {
6373 *how = SHUT_RD;
6374 }
6375 }
6376 }
6377 done:
6378 return error;
6379 }
6380
6381 void
6382 cfil_sock_udp_close_wait(struct socket *so)
6383 {
6384 socket_lock_assert_owned(so);
6385
6386 while (cfil_filters_udp_attached(so, FALSE)) {
6387 /*
6388 * Notify the filters we are going away so they can detach
6389 */
6390 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6391
6392 /*
6393 * Make sure we need to wait after the filter are notified
6394 * of the disconnection
6395 */
6396 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6397 break;
6398 }
6399 }
6400 }
6401
6402 void
6403 cfil_sock_udp_is_closed(struct socket *so)
6404 {
6405 struct cfil_info *cfil_info = NULL;
6406 struct cfilhashhead *cfilhash = NULL;
6407 struct cfil_db *db = NULL;
6408 struct cfil_hash_entry *hash_entry = NULL;
6409 struct cfil_hash_entry *temp_hash_entry = NULL;
6410 errno_t error = 0;
6411 int kcunit;
6412
6413 socket_lock_assert_owned(so);
6414
6415 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6416 db = so->so_cfil_db;
6417
6418 for (int i = 0; i < CFILHASHSIZE; i++) {
6419 cfilhash = &db->cfdb_hashbase[i];
6420
6421 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6422 if (hash_entry->cfentry_cfil != NULL) {
6423 cfil_info = hash_entry->cfentry_cfil;
6424
6425 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6426 /* Let the filters know of the closing */
6427 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6428 }
6429
6430 /* Last chance to push passed data out */
6431 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6432 if (error == 0) {
6433 cfil_service_inject_queue(so, cfil_info, 1);
6434 }
6435 cfil_release_sockbuf(so, 1);
6436
6437 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6438
6439 /* Pending data needs to go */
6440 cfil_flush_queues(so, cfil_info);
6441
6442 CFIL_INFO_VERIFY(cfil_info);
6443 }
6444 }
6445 }
6446 }
6447 }
6448
6449 void
6450 cfil_sock_udp_buf_update(struct sockbuf *sb)
6451 {
6452 struct cfil_info *cfil_info = NULL;
6453 struct cfilhashhead *cfilhash = NULL;
6454 struct cfil_db *db = NULL;
6455 struct cfil_hash_entry *hash_entry = NULL;
6456 struct cfil_hash_entry *temp_hash_entry = NULL;
6457 errno_t error = 0;
6458 int outgoing;
6459 struct socket *so = sb->sb_so;
6460
6461 socket_lock_assert_owned(so);
6462
6463 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6464 if (!cfil_sbtrim) {
6465 return;
6466 }
6467
6468 db = so->so_cfil_db;
6469
6470 for (int i = 0; i < CFILHASHSIZE; i++) {
6471 cfilhash = &db->cfdb_hashbase[i];
6472
6473 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6474 if (hash_entry->cfentry_cfil != NULL) {
6475 cfil_info = hash_entry->cfentry_cfil;
6476
6477 if ((sb->sb_flags & SB_RECV) == 0) {
6478 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6479 return;
6480 }
6481 outgoing = 1;
6482 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6483 } else {
6484 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6485 return;
6486 }
6487 outgoing = 0;
6488 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6489 }
6490
6491 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6492 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6493
6494 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6495 if (error == 0) {
6496 cfil_service_inject_queue(so, cfil_info, outgoing);
6497 }
6498 cfil_release_sockbuf(so, outgoing);
6499 }
6500 }
6501 }
6502 }
6503 }
6504
6505 void
6506 cfil_filter_show(u_int32_t kcunit)
6507 {
6508 struct content_filter *cfc = NULL;
6509 struct cfil_entry *entry;
6510 int count = 0;
6511
6512 if (content_filters == NULL) {
6513 return;
6514 }
6515 if (kcunit > MAX_CONTENT_FILTER) {
6516 return;
6517 }
6518
6519 cfil_rw_lock_shared(&cfil_lck_rw);
6520
6521 if (content_filters[kcunit - 1] == NULL) {
6522 cfil_rw_unlock_shared(&cfil_lck_rw);
6523 return;
6524 }
6525 cfc = content_filters[kcunit - 1];
6526
6527 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6528 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6529 if (cfc->cf_flags & CFF_DETACHING) {
6530 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6531 }
6532 if (cfc->cf_flags & CFF_ACTIVE) {
6533 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6534 }
6535 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6536 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6537 }
6538
6539 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6540 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6541 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6542
6543 count++;
6544
6545 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6546 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6547 } else {
6548 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6549 }
6550 }
6551 }
6552
6553 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6554
6555 cfil_rw_unlock_shared(&cfil_lck_rw);
6556 }
6557
6558 void
6559 cfil_info_show(void)
6560 {
6561 struct cfil_info *cfil_info;
6562 int count = 0;
6563
6564 cfil_rw_lock_shared(&cfil_lck_rw);
6565
6566 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6567
6568 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6569 count++;
6570
6571 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6572
6573 if (cfil_info->cfi_flags & CFIF_DROP) {
6574 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6575 }
6576 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6577 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6578 }
6579 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6580 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6581 }
6582 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6583 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6584 }
6585 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6586 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6587 }
6588 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6589 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6590 }
6591 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6592 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6593 }
6594 }
6595
6596 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6597
6598 cfil_rw_unlock_shared(&cfil_lck_rw);
6599 }
6600
6601 bool
6602 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6603 {
6604 if (cfil_info && cfil_info->cfi_hash_entry &&
6605 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6606 #if GC_DEBUG
6607 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6608 #endif
6609 return true;
6610 }
6611 return false;
6612 }
6613
6614 bool
6615 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6616 {
6617 struct cfil_entry *entry;
6618 struct timeval current_tv;
6619 struct timeval diff_time;
6620
6621 if (cfil_info == NULL) {
6622 return false;
6623 }
6624
6625 /*
6626 * If we have queued up more data than passed offset and we haven't received
6627 * an action from user space for a while (the user space filter might have crashed),
6628 * return action timed out.
6629 */
6630 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6631 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6632 microuptime(&current_tv);
6633
6634 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6635 entry = &cfil_info->cfi_entries[kcunit - 1];
6636
6637 if (entry->cfe_filter == NULL) {
6638 continue;
6639 }
6640
6641 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6642 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6643 // haven't gotten an action from this filter, check timeout
6644 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6645 if (diff_time.tv_sec >= timeout) {
6646 #if GC_DEBUG
6647 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6648 #endif
6649 return true;
6650 }
6651 }
6652 }
6653 }
6654 return false;
6655 }
6656
6657 bool
6658 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6659 {
6660 if (cfil_info == NULL) {
6661 return false;
6662 }
6663
6664 /*
6665 * Clean up flow if it exceeded queue thresholds
6666 */
6667 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6668 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6669 #if GC_DEBUG
6670 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6671 cfil_udp_gc_mbuf_num_max,
6672 cfil_udp_gc_mbuf_cnt_max,
6673 cfil_info->cfi_snd.cfi_tail_drop_cnt,
6674 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6675 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6676 #endif
6677 return true;
6678 }
6679
6680 return false;
6681 }
6682
6683 static void
6684 cfil_udp_gc_thread_sleep(bool forever)
6685 {
6686 if (forever) {
6687 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6688 THREAD_INTERRUPTIBLE);
6689 } else {
6690 uint64_t deadline = 0;
6691 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6692 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6693
6694 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6695 THREAD_INTERRUPTIBLE, deadline);
6696 }
6697 }
6698
6699 static void
6700 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6701 {
6702 #pragma unused(v, w)
6703
6704 ASSERT(cfil_udp_gc_thread == current_thread());
6705 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6706
6707 // Kick off gc shortly
6708 cfil_udp_gc_thread_sleep(false);
6709 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6710 /* NOTREACHED */
6711 }
6712
6713 static void
6714 cfil_info_udp_expire(void *v, wait_result_t w)
6715 {
6716 #pragma unused(v, w)
6717
6718 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6719 static uint32_t expired_count = 0;
6720
6721 struct cfil_info *cfil_info;
6722 struct cfil_hash_entry *hash_entry;
6723 struct cfil_db *db;
6724 struct socket *so;
6725 u_int64_t current_time = 0;
6726
6727 current_time = net_uptime();
6728
6729 // Get all expired UDP flow ids
6730 cfil_rw_lock_shared(&cfil_lck_rw);
6731
6732 if (cfil_sock_udp_attached_count == 0) {
6733 cfil_rw_unlock_shared(&cfil_lck_rw);
6734 goto go_sleep;
6735 }
6736
6737 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6738 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6739 break;
6740 }
6741
6742 if (IS_UDP(cfil_info->cfi_so)) {
6743 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6744 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6745 cfil_info_buffer_threshold_exceeded(cfil_info)) {
6746 expired_array[expired_count] = cfil_info->cfi_sock_id;
6747 expired_count++;
6748 }
6749 }
6750 }
6751 cfil_rw_unlock_shared(&cfil_lck_rw);
6752
6753 if (expired_count == 0) {
6754 goto go_sleep;
6755 }
6756
6757 for (uint32_t i = 0; i < expired_count; i++) {
6758 // Search for socket (UDP only and lock so)
6759 so = cfil_socket_from_sock_id(expired_array[i], true);
6760 if (so == NULL) {
6761 continue;
6762 }
6763
6764 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6765 if (cfil_info == NULL) {
6766 goto unlock;
6767 }
6768
6769 db = so->so_cfil_db;
6770 hash_entry = cfil_info->cfi_hash_entry;
6771
6772 if (db == NULL || hash_entry == NULL) {
6773 goto unlock;
6774 }
6775
6776 #if GC_DEBUG || LIFECYCLE_DEBUG
6777 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6778 #endif
6779
6780 cfil_db_delete_entry(db, hash_entry);
6781 cfil_info_free(cfil_info);
6782 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6783
6784 if (so->so_flags & SOF_CONTENT_FILTER) {
6785 if (db->cfdb_count == 0) {
6786 so->so_flags &= ~SOF_CONTENT_FILTER;
6787 }
6788 VERIFY(so->so_usecount > 0);
6789 so->so_usecount--;
6790 }
6791 unlock:
6792 socket_unlock(so, 1);
6793 }
6794
6795 #if GC_DEBUG
6796 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6797 #endif
6798 expired_count = 0;
6799
6800 go_sleep:
6801
6802 // Sleep forever (until waken up) if no more UDP flow to clean
6803 cfil_rw_lock_shared(&cfil_lck_rw);
6804 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
6805 cfil_rw_unlock_shared(&cfil_lck_rw);
6806 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
6807 /* NOTREACHED */
6808 }
6809
6810 struct m_tag *
6811 cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
6812 {
6813 struct m_tag *tag = NULL;
6814 struct cfil_tag *ctag = NULL;
6815 struct cfil_hash_entry *hash_entry = NULL;
6816
6817 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6818 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6819 return NULL;
6820 }
6821
6822 /* Allocate a tag */
6823 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6824 sizeof(struct cfil_tag), M_DONTWAIT, m);
6825
6826 if (tag) {
6827 ctag = (struct cfil_tag*)(tag + 1);
6828 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6829 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6830
6831 hash_entry = cfil_info->cfi_hash_entry;
6832 if (hash_entry->cfentry_family == AF_INET6) {
6833 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6834 &hash_entry->cfentry_faddr.addr6,
6835 hash_entry->cfentry_fport);
6836 } else if (hash_entry->cfentry_family == AF_INET) {
6837 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6838 hash_entry->cfentry_faddr.addr46.ia46_addr4,
6839 hash_entry->cfentry_fport);
6840 }
6841 m_tag_prepend(m, tag);
6842 return tag;
6843 }
6844 return NULL;
6845 }
6846
6847 struct m_tag *
6848 cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6849 struct sockaddr **faddr)
6850 {
6851 struct m_tag *tag = NULL;
6852 struct cfil_tag *ctag = NULL;
6853
6854 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6855 if (tag) {
6856 ctag = (struct cfil_tag *)(tag + 1);
6857 if (state_change_cnt) {
6858 *state_change_cnt = ctag->cfil_so_state_change_cnt;
6859 }
6860 if (options) {
6861 *options = ctag->cfil_so_options;
6862 }
6863 if (faddr) {
6864 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6865 }
6866
6867 /*
6868 * Unlink tag and hand it over to caller.
6869 * Note that caller will be responsible to free it.
6870 */
6871 m_tag_unlink(m, tag);
6872 return tag;
6873 }
6874 return NULL;
6875 }
6876
6877 static int
6878 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
6879 {
6880 struct content_filter *cfc = NULL;
6881 errno_t error = 0;
6882 size_t msgsize = 0;
6883
6884 if (buffer == NULL || stats_count == 0) {
6885 return error;
6886 }
6887
6888 if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
6889 return error;
6890 }
6891
6892 cfc = content_filters[kcunit - 1];
6893 if (cfc == NULL) {
6894 return error;
6895 }
6896
6897 /* Would be wasteful to try */
6898 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6899 error = ENOBUFS;
6900 goto done;
6901 }
6902
6903 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
6904 buffer->msghdr.cfm_len = msgsize;
6905 buffer->msghdr.cfm_version = 1;
6906 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
6907 buffer->msghdr.cfm_op = CFM_OP_STATS;
6908 buffer->msghdr.cfm_sock_id = 0;
6909 buffer->count = stats_count;
6910
6911 #if STATS_DEBUG
6912 CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
6913 kcunit,
6914 (unsigned long)msgsize,
6915 (unsigned long)sizeof(struct cfil_msg_stats_report),
6916 (unsigned long)sizeof(struct cfil_msg_sock_stats),
6917 (unsigned long)stats_count);
6918 #endif
6919
6920 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
6921 buffer,
6922 msgsize,
6923 CTL_DATA_EOR);
6924 if (error != 0) {
6925 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
6926 goto done;
6927 }
6928 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
6929
6930 #if STATS_DEBUG
6931 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
6932 #endif
6933
6934 done:
6935
6936 if (error == ENOBUFS) {
6937 OSIncrementAtomic(
6938 &cfil_stats.cfs_stats_event_flow_control);
6939
6940 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
6941 cfil_rw_lock_exclusive(&cfil_lck_rw);
6942 }
6943
6944 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
6945
6946 cfil_rw_unlock_exclusive(&cfil_lck_rw);
6947 } else if (error != 0) {
6948 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
6949 }
6950
6951 return error;
6952 }
6953
6954 static void
6955 cfil_stats_report_thread_sleep(bool forever)
6956 {
6957 #if STATS_DEBUG
6958 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
6959 #endif
6960
6961 if (forever) {
6962 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
6963 THREAD_INTERRUPTIBLE);
6964 } else {
6965 uint64_t deadline = 0;
6966 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
6967 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6968
6969 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
6970 THREAD_INTERRUPTIBLE, deadline);
6971 }
6972 }
6973
6974 static void
6975 cfil_stats_report_thread_func(void *v, wait_result_t w)
6976 {
6977 #pragma unused(v, w)
6978
6979 ASSERT(cfil_stats_report_thread == current_thread());
6980 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
6981
6982 // Kick off gc shortly
6983 cfil_stats_report_thread_sleep(false);
6984 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
6985 /* NOTREACHED */
6986 }
6987
6988 static bool
6989 cfil_stats_collect_flow_stats_for_filter(int kcunit,
6990 struct cfil_info *cfil_info,
6991 struct cfil_entry *entry,
6992 struct timeval current_tv)
6993 {
6994 struct cfil_stats_report_buffer *buffer = NULL;
6995 struct cfil_msg_sock_stats *flow_array = NULL;
6996 struct cfil_msg_sock_stats *stats = NULL;
6997 struct inpcb *inp = NULL;
6998 struct timeval diff_time;
6999 uint64_t diff_time_usecs;
7000 int index = 0;
7001
7002 if (entry->cfe_stats_report_frequency == 0) {
7003 return false;
7004 }
7005
7006 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7007 if (buffer == NULL) {
7008 #if STATS_DEBUG
7009 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7010 #endif
7011 return false;
7012 }
7013
7014 timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7015 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7016
7017 #if STATS_DEBUG
7018 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7019 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7020 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7021 (unsigned long long)current_tv.tv_sec,
7022 (unsigned long long)current_tv.tv_usec,
7023 (unsigned long long)diff_time.tv_sec,
7024 (unsigned long long)diff_time.tv_usec,
7025 (unsigned long long)diff_time_usecs,
7026 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7027 cfil_info->cfi_sock_id);
7028 #endif
7029
7030 // Compare elapsed time in usecs
7031 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7032 #if STATS_DEBUG
7033 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7034 cfil_info->cfi_byte_inbound_count,
7035 entry->cfe_byte_inbound_count_reported);
7036 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7037 cfil_info->cfi_byte_outbound_count,
7038 entry->cfe_byte_outbound_count_reported);
7039 #endif
7040 // Check if flow has new bytes that have not been reported
7041 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7042 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7043 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7044 index = global_cfil_stats_counts[kcunit - 1];
7045
7046 stats = &flow_array[index];
7047 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7048 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7049 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7050
7051 if (entry->cfe_laddr_sent == false) {
7052 /* cache it if necessary */
7053 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7054 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7055 if (inp != NULL) {
7056 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7057 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7058 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7059 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7060 src, dst, inp->inp_vflag & INP_IPV4, outgoing);
7061 }
7062 }
7063
7064 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7065 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7066 entry->cfe_laddr_sent = true;
7067 }
7068 }
7069
7070 global_cfil_stats_counts[kcunit - 1]++;
7071
7072 entry->cfe_stats_report_ts = current_tv;
7073 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7074 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7075 #if STATS_DEBUG
7076 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7077 #endif
7078 CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7079 return true;
7080 }
7081 }
7082 return false;
7083 }
7084
7085 static void
7086 cfil_stats_report(void *v, wait_result_t w)
7087 {
7088 #pragma unused(v, w)
7089
7090 struct cfil_info *cfil_info = NULL;
7091 struct cfil_entry *entry = NULL;
7092 struct timeval current_tv;
7093 uint32_t flow_count = 0;
7094 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7095 bool flow_reported = false;
7096
7097 #if STATS_DEBUG
7098 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7099 #endif
7100
7101 do {
7102 // Collect all sock ids of flows that has new stats
7103 cfil_rw_lock_shared(&cfil_lck_rw);
7104
7105 if (cfil_sock_attached_stats_count == 0) {
7106 #if STATS_DEBUG
7107 CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7108 #endif
7109 cfil_rw_unlock_shared(&cfil_lck_rw);
7110 goto go_sleep;
7111 }
7112
7113 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7114 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7115 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7116 }
7117 global_cfil_stats_counts[kcunit - 1] = 0;
7118 }
7119
7120 microuptime(&current_tv);
7121 flow_count = 0;
7122
7123 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7124 if (saved_next_sock_id != 0 &&
7125 saved_next_sock_id == cfil_info->cfi_sock_id) {
7126 // Here is where we left off previously, start accumulating
7127 saved_next_sock_id = 0;
7128 }
7129
7130 if (saved_next_sock_id == 0) {
7131 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7132 // Examine a fixed number of flows each round. Remember the current flow
7133 // so we can start from here for next loop
7134 saved_next_sock_id = cfil_info->cfi_sock_id;
7135 break;
7136 }
7137
7138 flow_reported = false;
7139 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7140 entry = &cfil_info->cfi_entries[kcunit - 1];
7141 if (entry->cfe_filter == NULL) {
7142 #if STATS_DEBUG
7143 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7144 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7145 #endif
7146 continue;
7147 }
7148
7149 if ((entry->cfe_stats_report_frequency > 0) &&
7150 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7151 flow_reported = true;
7152 }
7153 }
7154 if (flow_reported == true) {
7155 flow_count++;
7156 }
7157 }
7158 }
7159
7160 if (flow_count > 0) {
7161 #if STATS_DEBUG
7162 CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7163 #endif
7164 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7165 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7166 global_cfil_stats_counts[kcunit - 1] > 0) {
7167 cfil_dispatch_stats_event_locked(kcunit,
7168 global_cfil_stats_report_buffers[kcunit - 1],
7169 global_cfil_stats_counts[kcunit - 1]);
7170 }
7171 }
7172 } else {
7173 cfil_rw_unlock_shared(&cfil_lck_rw);
7174 goto go_sleep;
7175 }
7176
7177 cfil_rw_unlock_shared(&cfil_lck_rw);
7178
7179 // Loop again if we haven't finished the whole cfil_info list
7180 } while (saved_next_sock_id != 0);
7181
7182 go_sleep:
7183
7184 // Sleep forever (until waken up) if no more flow to report
7185 cfil_rw_lock_shared(&cfil_lck_rw);
7186 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7187 cfil_rw_unlock_shared(&cfil_lck_rw);
7188 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7189 /* NOTREACHED */
7190 }