]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50 * UDP, ICMP, etc).
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
53 *
54 *
55 * NECP FILTER CONTROL UNIT
56 *
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
60 *
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
65 *
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
68 *
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
72 *
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77 *
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
80 *
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
83 *
84 *
85 * THE MESSAGING PROTOCOL
86 *
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
94 *
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
101 *
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
105 *
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111 *
112 *
113 * EVENT MESSAGES
114 *
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
121 *
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
125 *
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129 *
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 *
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
136 *
137 *
138 * ACTION MESSAGES
139 *
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
147 *
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
151 *
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160 *
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
165 *
166 *
167 * PER FLOW "struct cfil_info"
168 *
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
175 *
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
180 * decision;
181 * - The inject queue for data that passed the filters and that needs
182 * to be re-injected;
183 * - A content filter specific state in a set of "struct cfil_entry"
184 *
185 *
186 * CONTENT FILTER STATE "struct cfil_entry"
187 *
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
190 *
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
194 *
195 * For each direction, "struct cfil_entry" maintains the following information:
196 * - The pass offset
197 * - The peek offset
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
203 *
204 *
205 * CONTENT FILTER QUEUES
206 *
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
210 *
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
213 * the list of mbufs.
214 *
215 * The data moves into the three content filter queues according to this
216 * sequence:
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
220 *
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
223 *
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
228 *
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
234 *
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
237 * INET/INET6 socket.
238 *
239 *
240 * IMPACT ON FLOW CONTROL
241 *
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
244 *
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
248 * processing delays.
249 *
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
256 *
257 *
258 * LOCKING STRATEGY
259 *
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
263 * threads.
264 *
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
267 *
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
271 *
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
275 *
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
278 *
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
282 *
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
285 *
286 * DATAGRAM SPECIFICS:
287 *
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
291 *
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
296 *
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
302 *
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
310 *
311 * LIMITATIONS
312 *
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314 *
315 * - Does not support TCP unordered messages
316 */
317
318 /*
319 * TO DO LIST
320 *
321 * Deal with OOB
322 *
323 */
324
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
334
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
338
339 #include <net/content_filter.h>
340 #include <net/content_filter_crypto.h>
341
342 #define _IP_VHL
343 #include <netinet/ip.h>
344 #include <netinet/in_pcb.h>
345 #include <netinet/tcp.h>
346 #include <netinet/tcp_var.h>
347 #include <netinet/udp.h>
348 #include <netinet/udp_var.h>
349
350 #include <string.h>
351 #include <libkern/libkern.h>
352 #include <kern/sched_prim.h>
353 #include <kern/task.h>
354 #include <mach/task_info.h>
355
356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
357 #define MAX_CONTENT_FILTER 2
358 #else
359 #define MAX_CONTENT_FILTER 8
360 #endif
361
362 extern struct inpcbinfo ripcbinfo;
363 struct cfil_entry;
364
365 /*
366 * The structure content_filter represents a user space content filter
367 * It's created and associated with a kernel control socket instance
368 */
369 struct content_filter {
370 kern_ctl_ref cf_kcref;
371 u_int32_t cf_kcunit;
372 u_int32_t cf_flags;
373
374 uint32_t cf_necp_control_unit;
375
376 uint32_t cf_sock_count;
377 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
378
379 cfil_crypto_state_t cf_crypto_state;
380 };
381
382 #define CFF_ACTIVE 0x01
383 #define CFF_DETACHING 0x02
384 #define CFF_FLOW_CONTROLLED 0x04
385
386 struct content_filter **content_filters = NULL;
387 uint32_t cfil_active_count = 0; /* Number of active content filters */
388 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
389 uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
390 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
392
393 static kern_ctl_ref cfil_kctlref = NULL;
394
395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
396 static lck_attr_t *cfil_lck_attr = NULL;
397 static lck_grp_t *cfil_lck_grp = NULL;
398 decl_lck_rw_data(static, cfil_lck_rw);
399
400 #define CFIL_RW_LCK_MAX 8
401
402 int cfil_rw_nxt_lck = 0;
403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
404
405 int cfil_rw_nxt_unlck = 0;
406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
407
408 #define CONTENT_FILTER_ZONE_NAME "content_filter"
409 #define CONTENT_FILTER_ZONE_MAX 10
410 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
411
412
413 #define CFIL_INFO_ZONE_NAME "cfil_info"
414 #define CFIL_INFO_ZONE_MAX 1024
415 static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
416
417 MBUFQ_HEAD(cfil_mqhead);
418
419 struct cfil_queue {
420 uint64_t q_start; /* offset of first byte in queue */
421 uint64_t q_end; /* offset of last byte in queue */
422 struct cfil_mqhead q_mq;
423 };
424
425 /*
426 * struct cfil_entry
427 *
428 * The is one entry per content filter
429 */
430 struct cfil_entry {
431 TAILQ_ENTRY(cfil_entry) cfe_link;
432 SLIST_ENTRY(cfil_entry) cfe_order_link;
433 struct content_filter *cfe_filter;
434
435 struct cfil_info *cfe_cfil_info;
436 uint32_t cfe_flags;
437 uint32_t cfe_necp_control_unit;
438 struct timeval cfe_last_event; /* To user space */
439 struct timeval cfe_last_action; /* From user space */
440 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
441 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
442 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
443 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
444 boolean_t cfe_laddr_sent;
445
446 struct cfe_buf {
447 /*
448 * cfe_pending_q holds data that has been delivered to
449 * the filter and for which we are waiting for an action
450 */
451 struct cfil_queue cfe_pending_q;
452 /*
453 * This queue is for data that has not be delivered to
454 * the content filter (new data, pass peek or flow control)
455 */
456 struct cfil_queue cfe_ctl_q;
457
458 uint64_t cfe_pass_offset;
459 uint64_t cfe_peek_offset;
460 uint64_t cfe_peeked;
461 } cfe_snd, cfe_rcv;
462 };
463
464 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
465 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
466 #define CFEF_DATA_START 0x0004 /* can send data event */
467 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
468 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
469 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
470 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
471 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
472
473
474 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
475 struct timeval _tdiff; \
476 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
477 timersub(t1, t0, &_tdiff); \
478 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
479 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
480 (cfil)->cfi_op_list_ctr ++; \
481 }
482
483 struct cfil_hash_entry;
484
485 /*
486 * struct cfil_info
487 *
488 * There is a struct cfil_info per socket
489 */
490 struct cfil_info {
491 TAILQ_ENTRY(cfil_info) cfi_link;
492 TAILQ_ENTRY(cfil_info) cfi_link_stats;
493 struct socket *cfi_so;
494 uint64_t cfi_flags;
495 uint64_t cfi_sock_id;
496 struct timeval64 cfi_first_event;
497 uint32_t cfi_op_list_ctr;
498 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
499 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
500 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
501 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
502
503 int cfi_dir;
504 uint64_t cfi_byte_inbound_count;
505 uint64_t cfi_byte_outbound_count;
506
507 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
508 u_int32_t cfi_debug;
509 struct cfi_buf {
510 /*
511 * cfi_pending_first and cfi_pending_last describe the total
512 * amount of data outstanding for all the filters on
513 * this socket and data in the flow queue
514 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
515 */
516 uint64_t cfi_pending_first;
517 uint64_t cfi_pending_last;
518 uint32_t cfi_pending_mbcnt;
519 uint32_t cfi_pending_mbnum;
520 uint32_t cfi_tail_drop_cnt;
521 /*
522 * cfi_pass_offset is the minimum of all the filters
523 */
524 uint64_t cfi_pass_offset;
525 /*
526 * cfi_inject_q holds data that needs to be re-injected
527 * into the socket after filtering and that can
528 * be queued because of flow control
529 */
530 struct cfil_queue cfi_inject_q;
531 } cfi_snd, cfi_rcv;
532
533 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
534 struct cfil_hash_entry *cfi_hash_entry;
535 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
536 } __attribute__((aligned(8)));
537
538 #define CFIF_DROP 0x0001 /* drop action applied */
539 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
540 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
541 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
542 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
543 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
544 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
545 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
546 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
547
548 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
549 #define CFI_SHIFT_GENCNT 32
550 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
551 #define CFI_SHIFT_FLOWHASH 0
552
553 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
554
555 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
556 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
557
558 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
559 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
560
561 /*
562 * UDP Socket Support
563 */
564 LIST_HEAD(cfilhashhead, cfil_hash_entry);
565 #define CFILHASHSIZE 16
566 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
567
568 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
569 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
570 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
571 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
572 (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
573 #define IS_RAW(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW && so->so_proto->pr_protocol == IPPROTO_RAW)
574
575 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
576 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
577 #else
578 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
579 #endif
580
581 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
582 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
583 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
584
585 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
586 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
588 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
592 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
593 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
594
595 /*
596 * Periodic Statistics Report:
597 */
598 static struct thread *cfil_stats_report_thread;
599 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
600 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
601 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
602
603 /* This buffer must have same layout as struct cfil_msg_stats_report */
604 struct cfil_stats_report_buffer {
605 struct cfil_msg_hdr msghdr;
606 uint32_t count;
607 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
608 };
609 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
610 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
611
612 /*
613 * UDP Garbage Collection:
614 */
615 static struct thread *cfil_udp_gc_thread;
616 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
617 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
618 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
619 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
620
621 /*
622 * UDP flow queue thresholds
623 */
624 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
625 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
626 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
627 /*
628 * UDP flow queue threshold globals:
629 */
630 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
631 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
632
633 /*
634 * struct cfil_hash_entry
635 *
636 * Hash entry for cfil_info
637 */
638 struct cfil_hash_entry {
639 LIST_ENTRY(cfil_hash_entry) cfentry_link;
640 struct cfil_info *cfentry_cfil;
641 u_short cfentry_fport;
642 u_short cfentry_lport;
643 sa_family_t cfentry_family;
644 u_int32_t cfentry_flowhash;
645 u_int64_t cfentry_lastused;
646 union {
647 /* foreign host table entry */
648 struct in_addr_4in6 addr46;
649 struct in6_addr addr6;
650 } cfentry_faddr;
651 union {
652 /* local host table entry */
653 struct in_addr_4in6 addr46;
654 struct in6_addr addr6;
655 } cfentry_laddr;
656 };
657
658 /*
659 * struct cfil_db
660 *
661 * For each UDP socket, this is a hash table maintaining all cfil_info structs
662 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
663 */
664 struct cfil_db {
665 struct socket *cfdb_so;
666 uint32_t cfdb_count; /* Number of total content filters */
667 struct cfilhashhead *cfdb_hashbase;
668 u_long cfdb_hashmask;
669 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
670 };
671
672 /*
673 * CFIL specific mbuf tag:
674 * Save state of socket at the point of data entry into cfil.
675 * Use saved state for reinjection at protocol layer.
676 */
677 struct cfil_tag {
678 union sockaddr_in_4_6 cfil_faddr;
679 uint32_t cfil_so_state_change_cnt;
680 short cfil_so_options;
681 int cfil_inp_flags;
682 };
683
684 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
685 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
686 static struct zone *cfil_hash_entry_zone = NULL;
687
688 #define CFIL_DB_ZONE_NAME "cfil_db"
689 #define CFIL_DB_ZONE_MAX 1024
690 static struct zone *cfil_db_zone = NULL;
691
692 /*
693 * Statistics
694 */
695
696 struct cfil_stats cfil_stats;
697
698 /*
699 * For troubleshooting
700 */
701 int cfil_log_level = LOG_ERR;
702 int cfil_debug = 1;
703
704 // Debug controls added for selective debugging.
705 // Disabled for production. If enabled,
706 // these will have performance impact
707 #define LIFECYCLE_DEBUG 0
708 #define VERDICT_DEBUG 0
709 #define DATA_DEBUG 0
710 #define SHOW_DEBUG 0
711 #define GC_DEBUG 0
712 #define STATS_DEBUG 0
713
714 /*
715 * Sysctls for logs and statistics
716 */
717 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
718 struct sysctl_req *);
719 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
720 struct sysctl_req *);
721
722 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
723
724 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
725 &cfil_log_level, 0, "");
726
727 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
728 &cfil_debug, 0, "");
729
730 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
731 &cfil_sock_attached_count, 0, "");
732
733 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
734 &cfil_active_count, 0, "");
735
736 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
737 &cfil_close_wait_timeout, 0, "");
738
739 static int cfil_sbtrim = 1;
740 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
741 &cfil_sbtrim, 0, "");
742
743 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
744 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
745
746 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
747 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
748
749 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
750 &cfil_stats, cfil_stats, "");
751
752 /*
753 * Forward declaration to appease the compiler
754 */
755 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
756 uint64_t, uint64_t);
757 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
758 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
759 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
760 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
761 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
762 struct mbuf *, struct mbuf *, uint32_t);
763 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
764 struct mbuf *, uint64_t);
765 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
766 struct in_addr, u_int16_t);
767 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
768 struct in6_addr *, u_int16_t);
769
770 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
771 static void cfil_info_free(struct cfil_info *);
772 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
773 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
774 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
775 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
776 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
777 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
778 static void cfil_info_verify(struct cfil_info *);
779 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
780 uint64_t, uint64_t);
781 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
782 static void cfil_release_sockbuf(struct socket *, int);
783 static int cfil_filters_attached(struct socket *);
784
785 static void cfil_rw_lock_exclusive(lck_rw_t *);
786 static void cfil_rw_unlock_exclusive(lck_rw_t *);
787 static void cfil_rw_lock_shared(lck_rw_t *);
788 static void cfil_rw_unlock_shared(lck_rw_t *);
789 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
790 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
791
792 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
793 static errno_t cfil_db_init(struct socket *);
794 static void cfil_db_free(struct socket *so);
795 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
796 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
797 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
798 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *);
799 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
800 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, int);
801 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
802 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
803 struct mbuf *, struct mbuf *, uint32_t);
804 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
805 static void cfil_sock_udp_is_closed(struct socket *);
806 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
807 static int cfil_sock_udp_shutdown(struct socket *, int *);
808 static void cfil_sock_udp_close_wait(struct socket *);
809 static void cfil_sock_udp_buf_update(struct sockbuf *);
810 static int cfil_filters_udp_attached(struct socket *, bool);
811 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
812 struct in6_addr **, struct in6_addr **,
813 u_int16_t *, u_int16_t *);
814 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
815 struct in_addr *, struct in_addr *,
816 u_int16_t *, u_int16_t *);
817 static void cfil_info_log(int, struct cfil_info *, const char *);
818 void cfil_filter_show(u_int32_t);
819 void cfil_info_show(void);
820 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
821 bool cfil_info_action_timed_out(struct cfil_info *, int);
822 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
823 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
824 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
825 static void cfil_udp_gc_thread_func(void *, wait_result_t);
826 static void cfil_info_udp_expire(void *, wait_result_t);
827 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
828 static void cfil_sock_received_verdict(struct socket *so);
829 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
830 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
831 boolean_t, boolean_t);
832 static void cfil_stats_report_thread_func(void *, wait_result_t);
833 static void cfil_stats_report(void *v, wait_result_t w);
834
835 bool check_port(struct sockaddr *, u_short);
836
837 /*
838 * Content filter global read write lock
839 */
840
841 static void
842 cfil_rw_lock_exclusive(lck_rw_t *lck)
843 {
844 void *lr_saved;
845
846 lr_saved = __builtin_return_address(0);
847
848 lck_rw_lock_exclusive(lck);
849
850 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
851 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
852 }
853
854 static void
855 cfil_rw_unlock_exclusive(lck_rw_t *lck)
856 {
857 void *lr_saved;
858
859 lr_saved = __builtin_return_address(0);
860
861 lck_rw_unlock_exclusive(lck);
862
863 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
864 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
865 }
866
867 static void
868 cfil_rw_lock_shared(lck_rw_t *lck)
869 {
870 void *lr_saved;
871
872 lr_saved = __builtin_return_address(0);
873
874 lck_rw_lock_shared(lck);
875
876 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
877 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
878 }
879
880 static void
881 cfil_rw_unlock_shared(lck_rw_t *lck)
882 {
883 void *lr_saved;
884
885 lr_saved = __builtin_return_address(0);
886
887 lck_rw_unlock_shared(lck);
888
889 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
890 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
891 }
892
893 static boolean_t
894 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
895 {
896 void *lr_saved;
897 boolean_t upgraded;
898
899 lr_saved = __builtin_return_address(0);
900
901 upgraded = lck_rw_lock_shared_to_exclusive(lck);
902 if (upgraded) {
903 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
904 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
905 }
906 return upgraded;
907 }
908
909 static void
910 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
911 {
912 void *lr_saved;
913
914 lr_saved = __builtin_return_address(0);
915
916 lck_rw_lock_exclusive_to_shared(lck);
917
918 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
919 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
920 }
921
922 static void
923 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
924 {
925 #if !MACH_ASSERT
926 #pragma unused(lck, exclusive)
927 #endif
928 LCK_RW_ASSERT(lck,
929 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
930 }
931
932 /*
933 * Return the number of bytes in the mbuf chain using the same
934 * method as m_length() or sballoc()
935 *
936 * Returns data len - starting from PKT start
937 * - retmbcnt - optional param to get total mbuf bytes in chain
938 * - retmbnum - optional param to get number of mbufs in chain
939 */
940 static unsigned int
941 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
942 {
943 struct mbuf *m0;
944 unsigned int pktlen = 0;
945 int mbcnt;
946 int mbnum;
947
948 // Locate the start of data
949 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
950 if (m0->m_flags & M_PKTHDR) {
951 break;
952 }
953 }
954 if (m0 == NULL) {
955 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
956 return 0;
957 }
958 m = m0;
959
960 if (retmbcnt == NULL && retmbnum == NULL) {
961 return m_length(m);
962 }
963
964 pktlen = 0;
965 mbcnt = 0;
966 mbnum = 0;
967 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
968 pktlen += m0->m_len;
969 mbnum++;
970 mbcnt += MSIZE;
971 if (m0->m_flags & M_EXT) {
972 mbcnt += m0->m_ext.ext_size;
973 }
974 }
975 if (retmbcnt) {
976 *retmbcnt = mbcnt;
977 }
978 if (retmbnum) {
979 *retmbnum = mbnum;
980 }
981 return pktlen;
982 }
983
984 static struct mbuf *
985 cfil_data_start(struct mbuf *m)
986 {
987 struct mbuf *m0;
988
989 // Locate the start of data
990 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
991 if (m0->m_flags & M_PKTHDR) {
992 break;
993 }
994 }
995 return m0;
996 }
997
998 /*
999 * Common mbuf queue utilities
1000 */
1001
1002 static inline void
1003 cfil_queue_init(struct cfil_queue *cfq)
1004 {
1005 cfq->q_start = 0;
1006 cfq->q_end = 0;
1007 MBUFQ_INIT(&cfq->q_mq);
1008 }
1009
1010 static inline uint64_t
1011 cfil_queue_drain(struct cfil_queue *cfq)
1012 {
1013 uint64_t drained = cfq->q_start - cfq->q_end;
1014 cfq->q_start = 0;
1015 cfq->q_end = 0;
1016 MBUFQ_DRAIN(&cfq->q_mq);
1017
1018 return drained;
1019 }
1020
1021 /* Return 1 when empty, 0 otherwise */
1022 static inline int
1023 cfil_queue_empty(struct cfil_queue *cfq)
1024 {
1025 return MBUFQ_EMPTY(&cfq->q_mq);
1026 }
1027
1028 static inline uint64_t
1029 cfil_queue_offset_first(struct cfil_queue *cfq)
1030 {
1031 return cfq->q_start;
1032 }
1033
1034 static inline uint64_t
1035 cfil_queue_offset_last(struct cfil_queue *cfq)
1036 {
1037 return cfq->q_end;
1038 }
1039
1040 static inline uint64_t
1041 cfil_queue_len(struct cfil_queue *cfq)
1042 {
1043 return cfq->q_end - cfq->q_start;
1044 }
1045
1046 /*
1047 * Routines to verify some fundamental assumptions
1048 */
1049
1050 static void
1051 cfil_queue_verify(struct cfil_queue *cfq)
1052 {
1053 mbuf_t chain;
1054 mbuf_t m;
1055 mbuf_t n;
1056 uint64_t queuesize = 0;
1057
1058 /* Verify offset are ordered */
1059 VERIFY(cfq->q_start <= cfq->q_end);
1060
1061 /*
1062 * When queue is empty, the offsets are equal otherwise the offsets
1063 * are different
1064 */
1065 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1066 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1067 cfq->q_start != cfq->q_end));
1068
1069 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1070 size_t chainsize = 0;
1071 m = chain;
1072 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1073 // skip the addr and control stuff if present
1074 m = cfil_data_start(m);
1075
1076 if (m == NULL ||
1077 m == (void *)M_TAG_FREE_PATTERN ||
1078 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1079 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1080 panic("%s - mq %p is free at %p", __func__,
1081 &cfq->q_mq, m);
1082 }
1083 for (n = m; n != NULL; n = n->m_next) {
1084 if (n->m_type != MT_DATA &&
1085 n->m_type != MT_HEADER &&
1086 n->m_type != MT_OOBDATA) {
1087 panic("%s - %p unsupported type %u", __func__,
1088 n, n->m_type);
1089 }
1090 chainsize += n->m_len;
1091 }
1092 if (mlen != chainsize) {
1093 panic("%s - %p m_length() %u != chainsize %lu",
1094 __func__, m, mlen, chainsize);
1095 }
1096 queuesize += chainsize;
1097 }
1098 if (queuesize != cfq->q_end - cfq->q_start) {
1099 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1100 m, queuesize, cfq->q_end - cfq->q_start);
1101 }
1102 }
1103
1104 static void
1105 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1106 {
1107 CFIL_QUEUE_VERIFY(cfq);
1108
1109 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1110 cfq->q_end += len;
1111
1112 CFIL_QUEUE_VERIFY(cfq);
1113 }
1114
1115 static void
1116 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1117 {
1118 CFIL_QUEUE_VERIFY(cfq);
1119
1120 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1121
1122 MBUFQ_REMOVE(&cfq->q_mq, m);
1123 MBUFQ_NEXT(m) = NULL;
1124 cfq->q_start += len;
1125
1126 CFIL_QUEUE_VERIFY(cfq);
1127 }
1128
1129 static mbuf_t
1130 cfil_queue_first(struct cfil_queue *cfq)
1131 {
1132 return MBUFQ_FIRST(&cfq->q_mq);
1133 }
1134
1135 static mbuf_t
1136 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1137 {
1138 #pragma unused(cfq)
1139 return MBUFQ_NEXT(m);
1140 }
1141
1142 static void
1143 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1144 {
1145 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1146 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1147
1148 /* Verify the queues are ordered so that pending is before ctl */
1149 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1150
1151 /* The peek offset cannot be less than the pass offset */
1152 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1153
1154 /* Make sure we've updated the offset we peeked at */
1155 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1156 }
1157
1158 static void
1159 cfil_entry_verify(struct cfil_entry *entry)
1160 {
1161 cfil_entry_buf_verify(&entry->cfe_snd);
1162 cfil_entry_buf_verify(&entry->cfe_rcv);
1163 }
1164
1165 static void
1166 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1167 {
1168 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1169
1170 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1171 }
1172
1173 static void
1174 cfil_info_verify(struct cfil_info *cfil_info)
1175 {
1176 int i;
1177
1178 if (cfil_info == NULL) {
1179 return;
1180 }
1181
1182 cfil_info_buf_verify(&cfil_info->cfi_snd);
1183 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1184
1185 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1186 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1187 }
1188 }
1189
1190 static void
1191 verify_content_filter(struct content_filter *cfc)
1192 {
1193 struct cfil_entry *entry;
1194 uint32_t count = 0;
1195
1196 VERIFY(cfc->cf_sock_count >= 0);
1197
1198 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1199 count++;
1200 VERIFY(cfc == entry->cfe_filter);
1201 }
1202 VERIFY(count == cfc->cf_sock_count);
1203 }
1204
1205 /*
1206 * Kernel control socket callbacks
1207 */
1208 static errno_t
1209 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1210 void **unitinfo)
1211 {
1212 errno_t error = 0;
1213 struct content_filter *cfc = NULL;
1214
1215 CFIL_LOG(LOG_NOTICE, "");
1216
1217 cfc = zalloc(content_filter_zone);
1218 if (cfc == NULL) {
1219 CFIL_LOG(LOG_ERR, "zalloc failed");
1220 error = ENOMEM;
1221 goto done;
1222 }
1223 bzero(cfc, sizeof(struct content_filter));
1224
1225 cfil_rw_lock_exclusive(&cfil_lck_rw);
1226 if (content_filters == NULL) {
1227 struct content_filter **tmp;
1228
1229 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1230
1231 MALLOC(tmp,
1232 struct content_filter **,
1233 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1234 M_TEMP,
1235 M_WAITOK | M_ZERO);
1236
1237 cfil_rw_lock_exclusive(&cfil_lck_rw);
1238
1239 if (tmp == NULL && content_filters == NULL) {
1240 error = ENOMEM;
1241 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1242 goto done;
1243 }
1244 /* Another thread may have won the race */
1245 if (content_filters != NULL) {
1246 FREE(tmp, M_TEMP);
1247 } else {
1248 content_filters = tmp;
1249 }
1250 }
1251
1252 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1253 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1254 error = EINVAL;
1255 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1256 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1257 error = EADDRINUSE;
1258 } else {
1259 /*
1260 * kernel control socket kcunit numbers start at 1
1261 */
1262 content_filters[sac->sc_unit - 1] = cfc;
1263
1264 cfc->cf_kcref = kctlref;
1265 cfc->cf_kcunit = sac->sc_unit;
1266 TAILQ_INIT(&cfc->cf_sock_entries);
1267
1268 *unitinfo = cfc;
1269 cfil_active_count++;
1270
1271 // Allocate periodic stats buffer for this filter
1272 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1273 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1274
1275 struct cfil_stats_report_buffer *buf;
1276
1277 MALLOC(buf,
1278 struct cfil_stats_report_buffer *,
1279 sizeof(struct cfil_stats_report_buffer),
1280 M_TEMP,
1281 M_WAITOK | M_ZERO);
1282
1283 cfil_rw_lock_exclusive(&cfil_lck_rw);
1284
1285 if (buf == NULL) {
1286 error = ENOMEM;
1287 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1288 goto done;
1289 }
1290
1291 /* Another thread may have won the race */
1292 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1293 FREE(buf, M_TEMP);
1294 } else {
1295 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1296 }
1297 }
1298 }
1299 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1300 done:
1301 if (error != 0 && cfc != NULL) {
1302 zfree(content_filter_zone, cfc);
1303 }
1304
1305 if (error == 0) {
1306 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1307 } else {
1308 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1309 }
1310
1311 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1312 error, cfil_active_count, sac->sc_unit);
1313
1314 return error;
1315 }
1316
1317 static errno_t
1318 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1319 {
1320 #pragma unused(kctlref)
1321 errno_t error = 0;
1322 struct content_filter *cfc;
1323 struct cfil_entry *entry;
1324 uint64_t sock_flow_id = 0;
1325
1326 CFIL_LOG(LOG_NOTICE, "");
1327
1328 if (content_filters == NULL) {
1329 CFIL_LOG(LOG_ERR, "no content filter");
1330 error = EINVAL;
1331 goto done;
1332 }
1333 if (kcunit > MAX_CONTENT_FILTER) {
1334 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1335 kcunit, MAX_CONTENT_FILTER);
1336 error = EINVAL;
1337 goto done;
1338 }
1339
1340 cfc = (struct content_filter *)unitinfo;
1341 if (cfc == NULL) {
1342 goto done;
1343 }
1344
1345 cfil_rw_lock_exclusive(&cfil_lck_rw);
1346 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1347 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1348 kcunit);
1349 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1350 goto done;
1351 }
1352 cfc->cf_flags |= CFF_DETACHING;
1353 /*
1354 * Remove all sockets from the filter
1355 */
1356 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1357 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1358
1359 verify_content_filter(cfc);
1360 /*
1361 * Accept all outstanding data by pushing to next filter
1362 * or back to socket
1363 *
1364 * TBD: Actually we should make sure all data has been pushed
1365 * back to socket
1366 */
1367 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1368 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1369 struct socket *so = cfil_info->cfi_so;
1370 sock_flow_id = cfil_info->cfi_sock_id;
1371
1372 /* Need to let data flow immediately */
1373 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1374 CFEF_DATA_START;
1375
1376 /*
1377 * Respect locking hierarchy
1378 */
1379 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1380
1381 socket_lock(so, 1);
1382
1383 /*
1384 * When cfe_filter is NULL the filter is detached
1385 * and the entry has been removed from cf_sock_entries
1386 */
1387 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1388 cfil_rw_lock_exclusive(&cfil_lck_rw);
1389 goto release;
1390 }
1391
1392 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1393 CFM_MAX_OFFSET,
1394 CFM_MAX_OFFSET);
1395
1396 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1397 CFM_MAX_OFFSET,
1398 CFM_MAX_OFFSET);
1399
1400 cfil_rw_lock_exclusive(&cfil_lck_rw);
1401
1402 /*
1403 * Check again to make sure if the cfil_info is still valid
1404 * as the socket may have been unlocked when when calling
1405 * cfil_acquire_sockbuf()
1406 */
1407 if (entry->cfe_filter == NULL ||
1408 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1409 goto release;
1410 }
1411
1412 /* The filter is now detached */
1413 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1414 #if LIFECYCLE_DEBUG
1415 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1416 #endif
1417 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1418 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1419 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1420 cfil_filters_attached(so) == 0) {
1421 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1422 (uint64_t)VM_KERNEL_ADDRPERM(so));
1423 wakeup((caddr_t)cfil_info);
1424 }
1425
1426 /*
1427 * Remove the filter entry from the content filter
1428 * but leave the rest of the state intact as the queues
1429 * may not be empty yet
1430 */
1431 entry->cfe_filter = NULL;
1432 entry->cfe_necp_control_unit = 0;
1433
1434 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1435 cfc->cf_sock_count--;
1436 release:
1437 socket_unlock(so, 1);
1438 }
1439 }
1440 verify_content_filter(cfc);
1441
1442 /* Free the stats buffer for this filter */
1443 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1444 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1445 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1446 }
1447 VERIFY(cfc->cf_sock_count == 0);
1448
1449 /*
1450 * Make filter inactive
1451 */
1452 content_filters[kcunit - 1] = NULL;
1453 cfil_active_count--;
1454 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1455
1456 if (cfc->cf_crypto_state != NULL) {
1457 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1458 cfc->cf_crypto_state = NULL;
1459 }
1460
1461 zfree(content_filter_zone, cfc);
1462 done:
1463 if (error == 0) {
1464 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1465 } else {
1466 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1467 }
1468
1469 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1470 error, cfil_active_count, kcunit);
1471
1472 return error;
1473 }
1474
1475 /*
1476 * cfil_acquire_sockbuf()
1477 *
1478 * Prevent any other thread from acquiring the sockbuf
1479 * We use sb_cfil_thread as a semaphore to prevent other threads from
1480 * messing with the sockbuf -- see sblock()
1481 * Note: We do not set SB_LOCK here because the thread may check or modify
1482 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1483 * sblock(), sbunlock() or sodefunct()
1484 */
1485 static int
1486 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1487 {
1488 thread_t tp = current_thread();
1489 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1490 lck_mtx_t *mutex_held;
1491 int error = 0;
1492
1493 /*
1494 * Wait until no thread is holding the sockbuf and other content
1495 * filter threads have released the sockbuf
1496 */
1497 while ((sb->sb_flags & SB_LOCK) ||
1498 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1499 if (so->so_proto->pr_getlock != NULL) {
1500 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1501 } else {
1502 mutex_held = so->so_proto->pr_domain->dom_mtx;
1503 }
1504
1505 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1506
1507 sb->sb_wantlock++;
1508 VERIFY(sb->sb_wantlock != 0);
1509
1510 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1511 NULL);
1512
1513 VERIFY(sb->sb_wantlock != 0);
1514 sb->sb_wantlock--;
1515 }
1516 /*
1517 * Use reference count for repetitive calls on same thread
1518 */
1519 if (sb->sb_cfil_refs == 0) {
1520 VERIFY(sb->sb_cfil_thread == NULL);
1521 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1522
1523 sb->sb_cfil_thread = tp;
1524 sb->sb_flags |= SB_LOCK;
1525 }
1526 sb->sb_cfil_refs++;
1527
1528 /* We acquire the socket buffer when we need to cleanup */
1529 if (cfil_info == NULL) {
1530 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1531 (uint64_t)VM_KERNEL_ADDRPERM(so));
1532 error = 0;
1533 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1534 CFIL_LOG(LOG_ERR, "so %llx drop set",
1535 (uint64_t)VM_KERNEL_ADDRPERM(so));
1536 error = EPIPE;
1537 }
1538
1539 return error;
1540 }
1541
1542 static void
1543 cfil_release_sockbuf(struct socket *so, int outgoing)
1544 {
1545 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1546 thread_t tp = current_thread();
1547
1548 socket_lock_assert_owned(so);
1549
1550 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1551 panic("%s sb_cfil_thread %p not current %p", __func__,
1552 sb->sb_cfil_thread, tp);
1553 }
1554 /*
1555 * Don't panic if we are defunct because SB_LOCK has
1556 * been cleared by sodefunct()
1557 */
1558 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1559 panic("%s SB_LOCK not set on %p", __func__,
1560 sb);
1561 }
1562 /*
1563 * We can unlock when the thread unwinds to the last reference
1564 */
1565 sb->sb_cfil_refs--;
1566 if (sb->sb_cfil_refs == 0) {
1567 sb->sb_cfil_thread = NULL;
1568 sb->sb_flags &= ~SB_LOCK;
1569
1570 if (sb->sb_wantlock > 0) {
1571 wakeup(&sb->sb_flags);
1572 }
1573 }
1574 }
1575
1576 cfil_sock_id_t
1577 cfil_sock_id_from_socket(struct socket *so)
1578 {
1579 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1580 return so->so_cfil->cfi_sock_id;
1581 } else {
1582 return CFIL_SOCK_ID_NONE;
1583 }
1584 }
1585
1586 static bool
1587 cfil_socket_safe_lock(struct inpcb *inp)
1588 {
1589 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1590 socket_lock(inp->inp_socket, 1);
1591 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1592 return true;
1593 }
1594 socket_unlock(inp->inp_socket, 1);
1595 }
1596 return false;
1597 }
1598
1599 static struct socket *
1600 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1601 {
1602 struct socket *so = NULL;
1603 u_int64_t gencnt = cfil_sock_id >> 32;
1604 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1605 struct inpcb *inp = NULL;
1606 struct inpcbinfo *pcbinfo = NULL;
1607
1608 #if VERDICT_DEBUG
1609 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1610 #endif
1611
1612 if (udp_only) {
1613 goto find_udp;
1614 }
1615
1616 pcbinfo = &tcbinfo;
1617 lck_rw_lock_shared(pcbinfo->ipi_lock);
1618 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1619 if (inp->inp_state != INPCB_STATE_DEAD &&
1620 inp->inp_socket != NULL &&
1621 inp->inp_flowhash == flowhash &&
1622 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1623 inp->inp_socket->so_cfil != NULL) {
1624 if (cfil_socket_safe_lock(inp)) {
1625 so = inp->inp_socket;
1626 }
1627 break;
1628 }
1629 }
1630 lck_rw_done(pcbinfo->ipi_lock);
1631 if (so != NULL) {
1632 goto done;
1633 }
1634
1635 find_udp:
1636
1637 pcbinfo = &udbinfo;
1638 lck_rw_lock_shared(pcbinfo->ipi_lock);
1639 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1640 if (inp->inp_state != INPCB_STATE_DEAD &&
1641 inp->inp_socket != NULL &&
1642 inp->inp_socket->so_cfil_db != NULL &&
1643 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1644 if (cfil_socket_safe_lock(inp)) {
1645 so = inp->inp_socket;
1646 }
1647 break;
1648 }
1649 }
1650 lck_rw_done(pcbinfo->ipi_lock);
1651
1652 pcbinfo = &ripcbinfo;
1653 lck_rw_lock_shared(pcbinfo->ipi_lock);
1654 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1655 if (inp->inp_state != INPCB_STATE_DEAD &&
1656 inp->inp_socket != NULL &&
1657 inp->inp_socket->so_cfil_db != NULL &&
1658 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1659 if (cfil_socket_safe_lock(inp)) {
1660 so = inp->inp_socket;
1661 }
1662 break;
1663 }
1664 }
1665 lck_rw_done(pcbinfo->ipi_lock);
1666
1667 done:
1668 if (so == NULL) {
1669 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1670 CFIL_LOG(LOG_DEBUG,
1671 "no socket for sock_id %llx gencnt %llx flowhash %x",
1672 cfil_sock_id, gencnt, flowhash);
1673 }
1674
1675 return so;
1676 }
1677
1678 static struct socket *
1679 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1680 {
1681 struct socket *so = NULL;
1682 struct inpcb *inp = NULL;
1683 struct inpcbinfo *pcbinfo = &tcbinfo;
1684
1685 lck_rw_lock_shared(pcbinfo->ipi_lock);
1686 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1687 if (inp->inp_state != INPCB_STATE_DEAD &&
1688 inp->inp_socket != NULL &&
1689 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1690 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1691 if (cfil_socket_safe_lock(inp)) {
1692 so = inp->inp_socket;
1693 }
1694 break;
1695 }
1696 }
1697 lck_rw_done(pcbinfo->ipi_lock);
1698 if (so != NULL) {
1699 goto done;
1700 }
1701
1702 pcbinfo = &udbinfo;
1703 lck_rw_lock_shared(pcbinfo->ipi_lock);
1704 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1705 if (inp->inp_state != INPCB_STATE_DEAD &&
1706 inp->inp_socket != NULL &&
1707 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1708 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1709 if (cfil_socket_safe_lock(inp)) {
1710 so = inp->inp_socket;
1711 }
1712 break;
1713 }
1714 }
1715 lck_rw_done(pcbinfo->ipi_lock);
1716
1717 done:
1718 return so;
1719 }
1720
1721 static void
1722 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1723 {
1724 struct cfil_info *cfil = NULL;
1725 Boolean found = FALSE;
1726 int kcunit;
1727
1728 if (cfil_info == NULL) {
1729 return;
1730 }
1731
1732 if (report_frequency) {
1733 if (entry == NULL) {
1734 return;
1735 }
1736
1737 // Update stats reporting frequency.
1738 if (entry->cfe_stats_report_frequency != report_frequency) {
1739 entry->cfe_stats_report_frequency = report_frequency;
1740 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1741 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1742 }
1743 microuptime(&entry->cfe_stats_report_ts);
1744
1745 // Insert cfil_info into list only if it is not in yet.
1746 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1747 if (cfil == cfil_info) {
1748 return;
1749 }
1750 }
1751
1752 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1753
1754 // Wake up stats thread if this is first flow added
1755 if (cfil_sock_attached_stats_count == 0) {
1756 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1757 }
1758 cfil_sock_attached_stats_count++;
1759 #if STATS_DEBUG
1760 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1761 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1762 cfil_info->cfi_sock_id,
1763 entry->cfe_stats_report_frequency);
1764 #endif
1765 }
1766 } else {
1767 // Turn off stats reporting for this filter.
1768 if (entry != NULL) {
1769 // Already off, no change.
1770 if (entry->cfe_stats_report_frequency == 0) {
1771 return;
1772 }
1773
1774 entry->cfe_stats_report_frequency = 0;
1775 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1776 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1777 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1778 return;
1779 }
1780 }
1781 }
1782
1783 // No more filter asking for stats for this cfil_info, remove from list.
1784 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1785 found = FALSE;
1786 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1787 if (cfil == cfil_info) {
1788 found = TRUE;
1789 break;
1790 }
1791 }
1792 if (found) {
1793 cfil_sock_attached_stats_count--;
1794 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1795 #if STATS_DEBUG
1796 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1797 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1798 cfil_info->cfi_sock_id);
1799 #endif
1800 }
1801 }
1802 }
1803 }
1804
1805 static errno_t
1806 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1807 int flags)
1808 {
1809 #pragma unused(kctlref, flags)
1810 errno_t error = 0;
1811 struct cfil_msg_hdr *msghdr;
1812 struct content_filter *cfc = (struct content_filter *)unitinfo;
1813 struct socket *so;
1814 struct cfil_msg_action *action_msg;
1815 struct cfil_entry *entry;
1816 struct cfil_info *cfil_info = NULL;
1817 unsigned int data_len = 0;
1818
1819 CFIL_LOG(LOG_INFO, "");
1820
1821 if (content_filters == NULL) {
1822 CFIL_LOG(LOG_ERR, "no content filter");
1823 error = EINVAL;
1824 goto done;
1825 }
1826 if (kcunit > MAX_CONTENT_FILTER) {
1827 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1828 kcunit, MAX_CONTENT_FILTER);
1829 error = EINVAL;
1830 goto done;
1831 }
1832 if (m == NULL) {
1833 CFIL_LOG(LOG_ERR, "null mbuf");
1834 error = EINVAL;
1835 goto done;
1836 }
1837 data_len = m_length(m);
1838
1839 if (data_len < sizeof(struct cfil_msg_hdr)) {
1840 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1841 error = EINVAL;
1842 goto done;
1843 }
1844 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1845 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1846 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1847 error = EINVAL;
1848 goto done;
1849 }
1850 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1851 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1852 error = EINVAL;
1853 goto done;
1854 }
1855 if (msghdr->cfm_len > data_len) {
1856 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1857 error = EINVAL;
1858 goto done;
1859 }
1860
1861 /* Validate action operation */
1862 switch (msghdr->cfm_op) {
1863 case CFM_OP_DATA_UPDATE:
1864 OSIncrementAtomic(
1865 &cfil_stats.cfs_ctl_action_data_update);
1866 break;
1867 case CFM_OP_DROP:
1868 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1869 break;
1870 case CFM_OP_BLESS_CLIENT:
1871 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1872 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1873 error = EINVAL;
1874 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1875 msghdr->cfm_len,
1876 msghdr->cfm_op);
1877 goto done;
1878 }
1879 error = cfil_action_bless_client(kcunit, msghdr);
1880 goto done;
1881 case CFM_OP_SET_CRYPTO_KEY:
1882 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1883 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1884 error = EINVAL;
1885 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1886 msghdr->cfm_len,
1887 msghdr->cfm_op);
1888 goto done;
1889 }
1890 error = cfil_action_set_crypto_key(kcunit, msghdr);
1891 goto done;
1892 default:
1893 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1894 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1895 error = EINVAL;
1896 goto done;
1897 }
1898 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1899 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1900 error = EINVAL;
1901 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1902 msghdr->cfm_len,
1903 msghdr->cfm_op);
1904 goto done;
1905 }
1906 cfil_rw_lock_shared(&cfil_lck_rw);
1907 if (cfc != (void *)content_filters[kcunit - 1]) {
1908 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1909 kcunit);
1910 error = EINVAL;
1911 cfil_rw_unlock_shared(&cfil_lck_rw);
1912 goto done;
1913 }
1914 cfil_rw_unlock_shared(&cfil_lck_rw);
1915
1916 // Search for socket (TCP+UDP and lock so)
1917 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1918 if (so == NULL) {
1919 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1920 msghdr->cfm_sock_id);
1921 error = EINVAL;
1922 goto done;
1923 }
1924
1925 cfil_info = so->so_cfil_db != NULL ?
1926 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1927
1928 if (cfil_info == NULL) {
1929 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1930 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1931 error = EINVAL;
1932 goto unlock;
1933 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1934 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1935 (uint64_t)VM_KERNEL_ADDRPERM(so));
1936 error = EINVAL;
1937 goto unlock;
1938 }
1939
1940 if (cfil_info->cfi_debug) {
1941 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
1942 }
1943
1944 entry = &cfil_info->cfi_entries[kcunit - 1];
1945 if (entry->cfe_filter == NULL) {
1946 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1947 (uint64_t)VM_KERNEL_ADDRPERM(so));
1948 error = EINVAL;
1949 goto unlock;
1950 }
1951
1952 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1953 entry->cfe_flags |= CFEF_DATA_START;
1954 } else {
1955 CFIL_LOG(LOG_ERR,
1956 "so %llx attached not sent for %u",
1957 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1958 error = EINVAL;
1959 goto unlock;
1960 }
1961
1962 microuptime(&entry->cfe_last_action);
1963 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1964
1965 action_msg = (struct cfil_msg_action *)msghdr;
1966
1967 switch (msghdr->cfm_op) {
1968 case CFM_OP_DATA_UPDATE:
1969
1970 if (cfil_info->cfi_debug) {
1971 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
1972 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1973 (uint64_t)VM_KERNEL_ADDRPERM(so),
1974 cfil_info->cfi_sock_id,
1975 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1976 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1977 }
1978
1979 #if VERDICT_DEBUG
1980 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1981 (uint64_t)VM_KERNEL_ADDRPERM(so),
1982 cfil_info->cfi_sock_id,
1983 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1984 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1985 #endif
1986 /*
1987 * Received verdict, at this point we know this
1988 * socket connection is allowed. Unblock thread
1989 * immediately before proceeding to process the verdict.
1990 */
1991 cfil_sock_received_verdict(so);
1992
1993 if (action_msg->cfa_out_peek_offset != 0 ||
1994 action_msg->cfa_out_pass_offset != 0) {
1995 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1996 action_msg->cfa_out_pass_offset,
1997 action_msg->cfa_out_peek_offset);
1998 }
1999 if (error == EJUSTRETURN) {
2000 error = 0;
2001 }
2002 if (error != 0) {
2003 break;
2004 }
2005 if (action_msg->cfa_in_peek_offset != 0 ||
2006 action_msg->cfa_in_pass_offset != 0) {
2007 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2008 action_msg->cfa_in_pass_offset,
2009 action_msg->cfa_in_peek_offset);
2010 }
2011 if (error == EJUSTRETURN) {
2012 error = 0;
2013 }
2014
2015 // Toggle stats reporting according to received verdict.
2016 cfil_rw_lock_exclusive(&cfil_lck_rw);
2017 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2018 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2019
2020 break;
2021
2022 case CFM_OP_DROP:
2023 if (cfil_info->cfi_debug) {
2024 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2025 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2026 (uint64_t)VM_KERNEL_ADDRPERM(so),
2027 cfil_info->cfi_sock_id,
2028 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2029 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2030 }
2031
2032 #if VERDICT_DEBUG
2033 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2034 (uint64_t)VM_KERNEL_ADDRPERM(so),
2035 cfil_info->cfi_sock_id,
2036 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2037 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2038 #endif
2039 error = cfil_action_drop(so, cfil_info, kcunit);
2040 cfil_sock_received_verdict(so);
2041 break;
2042
2043 default:
2044 error = EINVAL;
2045 break;
2046 }
2047 unlock:
2048 socket_unlock(so, 1);
2049 done:
2050 mbuf_freem(m);
2051
2052 if (error == 0) {
2053 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2054 } else {
2055 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2056 }
2057
2058 return error;
2059 }
2060
2061 static errno_t
2062 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2063 int opt, void *data, size_t *len)
2064 {
2065 #pragma unused(kctlref, opt)
2066 struct cfil_info *cfil_info = NULL;
2067 errno_t error = 0;
2068 struct content_filter *cfc = (struct content_filter *)unitinfo;
2069
2070 CFIL_LOG(LOG_NOTICE, "");
2071
2072 cfil_rw_lock_shared(&cfil_lck_rw);
2073
2074 if (content_filters == NULL) {
2075 CFIL_LOG(LOG_ERR, "no content filter");
2076 error = EINVAL;
2077 goto done;
2078 }
2079 if (kcunit > MAX_CONTENT_FILTER) {
2080 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2081 kcunit, MAX_CONTENT_FILTER);
2082 error = EINVAL;
2083 goto done;
2084 }
2085 if (cfc != (void *)content_filters[kcunit - 1]) {
2086 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2087 kcunit);
2088 error = EINVAL;
2089 goto done;
2090 }
2091 switch (opt) {
2092 case CFIL_OPT_NECP_CONTROL_UNIT:
2093 if (*len < sizeof(uint32_t)) {
2094 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2095 error = EINVAL;
2096 goto done;
2097 }
2098 if (data != NULL) {
2099 *(uint32_t *)data = cfc->cf_necp_control_unit;
2100 }
2101 break;
2102 case CFIL_OPT_GET_SOCKET_INFO:
2103 if (*len != sizeof(struct cfil_opt_sock_info)) {
2104 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2105 error = EINVAL;
2106 goto done;
2107 }
2108 if (data == NULL) {
2109 CFIL_LOG(LOG_ERR, "data not passed");
2110 error = EINVAL;
2111 goto done;
2112 }
2113
2114 struct cfil_opt_sock_info *sock_info =
2115 (struct cfil_opt_sock_info *) data;
2116
2117 // Unlock here so that we never hold both cfil_lck_rw and the
2118 // socket_lock at the same time. Otherwise, this can deadlock
2119 // because soclose() takes the socket_lock and then exclusive
2120 // cfil_lck_rw and we require the opposite order.
2121
2122 // WARNING: Be sure to never use anything protected
2123 // by cfil_lck_rw beyond this point.
2124 // WARNING: Be sure to avoid fallthrough and
2125 // goto return_already_unlocked from this branch.
2126 cfil_rw_unlock_shared(&cfil_lck_rw);
2127
2128 // Search (TCP+UDP) and lock socket
2129 struct socket *sock =
2130 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2131 if (sock == NULL) {
2132 #if LIFECYCLE_DEBUG
2133 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2134 sock_info->cfs_sock_id);
2135 #endif
2136 error = ENOENT;
2137 goto return_already_unlocked;
2138 }
2139
2140 cfil_info = (sock->so_cfil_db != NULL) ?
2141 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2142
2143 if (cfil_info == NULL) {
2144 #if LIFECYCLE_DEBUG
2145 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2146 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2147 #endif
2148 error = EINVAL;
2149 socket_unlock(sock, 1);
2150 goto return_already_unlocked;
2151 }
2152
2153 // Fill out family, type, and protocol
2154 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2155 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2156 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2157
2158 // Source and destination addresses
2159 struct inpcb *inp = sotoinpcb(sock);
2160 if (inp->inp_vflag & INP_IPV6) {
2161 struct in6_addr *laddr = NULL, *faddr = NULL;
2162 u_int16_t lport = 0, fport = 0;
2163
2164 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2165 &laddr, &faddr, &lport, &fport);
2166 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2167 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2168 } else if (inp->inp_vflag & INP_IPV4) {
2169 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2170 u_int16_t lport = 0, fport = 0;
2171
2172 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2173 &laddr, &faddr, &lport, &fport);
2174 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2175 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2176 }
2177
2178 // Set the pid info
2179 sock_info->cfs_pid = sock->last_pid;
2180 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2181
2182 if (sock->so_flags & SOF_DELEGATED) {
2183 sock_info->cfs_e_pid = sock->e_pid;
2184 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2185 } else {
2186 sock_info->cfs_e_pid = sock->last_pid;
2187 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2188 }
2189
2190 socket_unlock(sock, 1);
2191
2192 goto return_already_unlocked;
2193 default:
2194 error = ENOPROTOOPT;
2195 break;
2196 }
2197 done:
2198 cfil_rw_unlock_shared(&cfil_lck_rw);
2199
2200 return error;
2201
2202 return_already_unlocked:
2203
2204 return error;
2205 }
2206
2207 static errno_t
2208 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2209 int opt, void *data, size_t len)
2210 {
2211 #pragma unused(kctlref, opt)
2212 errno_t error = 0;
2213 struct content_filter *cfc = (struct content_filter *)unitinfo;
2214
2215 CFIL_LOG(LOG_NOTICE, "");
2216
2217 cfil_rw_lock_exclusive(&cfil_lck_rw);
2218
2219 if (content_filters == NULL) {
2220 CFIL_LOG(LOG_ERR, "no content filter");
2221 error = EINVAL;
2222 goto done;
2223 }
2224 if (kcunit > MAX_CONTENT_FILTER) {
2225 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2226 kcunit, MAX_CONTENT_FILTER);
2227 error = EINVAL;
2228 goto done;
2229 }
2230 if (cfc != (void *)content_filters[kcunit - 1]) {
2231 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2232 kcunit);
2233 error = EINVAL;
2234 goto done;
2235 }
2236 switch (opt) {
2237 case CFIL_OPT_NECP_CONTROL_UNIT:
2238 if (len < sizeof(uint32_t)) {
2239 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2240 "len too small %lu", len);
2241 error = EINVAL;
2242 goto done;
2243 }
2244 if (cfc->cf_necp_control_unit != 0) {
2245 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2246 "already set %u",
2247 cfc->cf_necp_control_unit);
2248 error = EINVAL;
2249 goto done;
2250 }
2251 cfc->cf_necp_control_unit = *(uint32_t *)data;
2252 break;
2253 default:
2254 error = ENOPROTOOPT;
2255 break;
2256 }
2257 done:
2258 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2259
2260 return error;
2261 }
2262
2263
2264 static void
2265 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2266 {
2267 #pragma unused(kctlref, flags)
2268 struct content_filter *cfc = (struct content_filter *)unitinfo;
2269 struct socket *so = NULL;
2270 int error;
2271 struct cfil_entry *entry;
2272 struct cfil_info *cfil_info = NULL;
2273
2274 CFIL_LOG(LOG_INFO, "");
2275
2276 if (content_filters == NULL) {
2277 CFIL_LOG(LOG_ERR, "no content filter");
2278 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2279 return;
2280 }
2281 if (kcunit > MAX_CONTENT_FILTER) {
2282 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2283 kcunit, MAX_CONTENT_FILTER);
2284 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2285 return;
2286 }
2287 cfil_rw_lock_shared(&cfil_lck_rw);
2288 if (cfc != (void *)content_filters[kcunit - 1]) {
2289 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2290 kcunit);
2291 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2292 goto done;
2293 }
2294 /* Let's assume the flow control is lifted */
2295 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2296 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2297 cfil_rw_lock_exclusive(&cfil_lck_rw);
2298 }
2299
2300 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2301
2302 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2303 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2304 }
2305 /*
2306 * Flow control will be raised again as soon as an entry cannot enqueue
2307 * to the kernel control socket
2308 */
2309 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2310 verify_content_filter(cfc);
2311
2312 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2313
2314 /* Find an entry that is flow controlled */
2315 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2316 if (entry->cfe_cfil_info == NULL ||
2317 entry->cfe_cfil_info->cfi_so == NULL) {
2318 continue;
2319 }
2320 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2321 continue;
2322 }
2323 }
2324 if (entry == NULL) {
2325 break;
2326 }
2327
2328 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2329
2330 cfil_info = entry->cfe_cfil_info;
2331 so = cfil_info->cfi_so;
2332
2333 cfil_rw_unlock_shared(&cfil_lck_rw);
2334 socket_lock(so, 1);
2335
2336 do {
2337 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2338 if (error == 0) {
2339 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2340 }
2341 cfil_release_sockbuf(so, 1);
2342 if (error != 0) {
2343 break;
2344 }
2345
2346 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2347 if (error == 0) {
2348 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2349 }
2350 cfil_release_sockbuf(so, 0);
2351 } while (0);
2352
2353 socket_lock_assert_owned(so);
2354 socket_unlock(so, 1);
2355
2356 cfil_rw_lock_shared(&cfil_lck_rw);
2357 }
2358 done:
2359 cfil_rw_unlock_shared(&cfil_lck_rw);
2360 }
2361
2362 void
2363 cfil_init(void)
2364 {
2365 struct kern_ctl_reg kern_ctl;
2366 errno_t error = 0;
2367 vm_size_t content_filter_size = 0; /* size of content_filter */
2368 vm_size_t cfil_info_size = 0; /* size of cfil_info */
2369 vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2370 vm_size_t cfil_db_size = 0; /* size of cfil_db */
2371 unsigned int mbuf_limit = 0;
2372
2373 CFIL_LOG(LOG_NOTICE, "");
2374
2375 /*
2376 * Compile time verifications
2377 */
2378 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2379 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2380 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2381 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2382
2383 /*
2384 * Runtime time verifications
2385 */
2386 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2387 sizeof(uint32_t)));
2388 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2389 sizeof(uint32_t)));
2390 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2391 sizeof(uint32_t)));
2392 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2393 sizeof(uint32_t)));
2394
2395 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2396 sizeof(uint32_t)));
2397 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2398 sizeof(uint32_t)));
2399
2400 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2401 sizeof(uint32_t)));
2402 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2403 sizeof(uint32_t)));
2404 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2405 sizeof(uint32_t)));
2406 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2407 sizeof(uint32_t)));
2408
2409 /*
2410 * Zone for content filters kernel control sockets
2411 */
2412 content_filter_size = sizeof(struct content_filter);
2413 content_filter_zone = zinit(content_filter_size,
2414 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2415 0,
2416 CONTENT_FILTER_ZONE_NAME);
2417 if (content_filter_zone == NULL) {
2418 panic("%s: zinit(%s) failed", __func__,
2419 CONTENT_FILTER_ZONE_NAME);
2420 /* NOTREACHED */
2421 }
2422 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2423 zone_change(content_filter_zone, Z_EXPAND, TRUE);
2424
2425 /*
2426 * Zone for per socket content filters
2427 */
2428 cfil_info_size = sizeof(struct cfil_info);
2429 cfil_info_zone = zinit(cfil_info_size,
2430 CFIL_INFO_ZONE_MAX * cfil_info_size,
2431 0,
2432 CFIL_INFO_ZONE_NAME);
2433 if (cfil_info_zone == NULL) {
2434 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2435 /* NOTREACHED */
2436 }
2437 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2438 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2439
2440 /*
2441 * Zone for content filters cfil hash entries and db
2442 */
2443 cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2444 cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2445 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2446 0,
2447 CFIL_HASH_ENTRY_ZONE_NAME);
2448 if (cfil_hash_entry_zone == NULL) {
2449 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2450 /* NOTREACHED */
2451 }
2452 zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2453 zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2454
2455 cfil_db_size = sizeof(struct cfil_db);
2456 cfil_db_zone = zinit(cfil_db_size,
2457 CFIL_DB_ZONE_MAX * cfil_db_size,
2458 0,
2459 CFIL_DB_ZONE_NAME);
2460 if (cfil_db_zone == NULL) {
2461 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2462 /* NOTREACHED */
2463 }
2464 zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2465 zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2466
2467 /*
2468 * Allocate locks
2469 */
2470 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2471 if (cfil_lck_grp_attr == NULL) {
2472 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2473 /* NOTREACHED */
2474 }
2475 cfil_lck_grp = lck_grp_alloc_init("content filter",
2476 cfil_lck_grp_attr);
2477 if (cfil_lck_grp == NULL) {
2478 panic("%s: lck_grp_alloc_init failed", __func__);
2479 /* NOTREACHED */
2480 }
2481 cfil_lck_attr = lck_attr_alloc_init();
2482 if (cfil_lck_attr == NULL) {
2483 panic("%s: lck_attr_alloc_init failed", __func__);
2484 /* NOTREACHED */
2485 }
2486 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2487
2488 TAILQ_INIT(&cfil_sock_head);
2489 TAILQ_INIT(&cfil_sock_head_stats);
2490
2491 /*
2492 * Register kernel control
2493 */
2494 bzero(&kern_ctl, sizeof(kern_ctl));
2495 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2496 sizeof(kern_ctl.ctl_name));
2497 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2498 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2499 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2500 kern_ctl.ctl_connect = cfil_ctl_connect;
2501 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2502 kern_ctl.ctl_send = cfil_ctl_send;
2503 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2504 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2505 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2506 error = ctl_register(&kern_ctl, &cfil_kctlref);
2507 if (error != 0) {
2508 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2509 return;
2510 }
2511
2512 // Spawn thread for gargage collection
2513 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2514 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2515 panic_plain("%s: Can't create UDP GC thread", __func__);
2516 /* NOTREACHED */
2517 }
2518 /* this must not fail */
2519 VERIFY(cfil_udp_gc_thread != NULL);
2520
2521 // Spawn thread for statistics reporting
2522 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2523 &cfil_stats_report_thread) != KERN_SUCCESS) {
2524 panic_plain("%s: Can't create statistics report thread", __func__);
2525 /* NOTREACHED */
2526 }
2527 /* this must not fail */
2528 VERIFY(cfil_stats_report_thread != NULL);
2529
2530 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2531 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2532 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2533 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2534
2535 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2536 }
2537
2538 struct cfil_info *
2539 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2540 {
2541 int kcunit;
2542 struct cfil_info *cfil_info = NULL;
2543 struct inpcb *inp = sotoinpcb(so);
2544
2545 CFIL_LOG(LOG_INFO, "");
2546
2547 socket_lock_assert_owned(so);
2548
2549 cfil_info = zalloc(cfil_info_zone);
2550 if (cfil_info == NULL) {
2551 goto done;
2552 }
2553 bzero(cfil_info, sizeof(struct cfil_info));
2554
2555 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2556 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2557
2558 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2559 struct cfil_entry *entry;
2560
2561 entry = &cfil_info->cfi_entries[kcunit - 1];
2562 entry->cfe_cfil_info = cfil_info;
2563
2564 /* Initialize the filter entry */
2565 entry->cfe_filter = NULL;
2566 entry->cfe_flags = 0;
2567 entry->cfe_necp_control_unit = 0;
2568 entry->cfe_snd.cfe_pass_offset = 0;
2569 entry->cfe_snd.cfe_peek_offset = 0;
2570 entry->cfe_snd.cfe_peeked = 0;
2571 entry->cfe_rcv.cfe_pass_offset = 0;
2572 entry->cfe_rcv.cfe_peek_offset = 0;
2573 entry->cfe_rcv.cfe_peeked = 0;
2574 /*
2575 * Timestamp the last action to avoid pre-maturely
2576 * triggering garbage collection
2577 */
2578 microuptime(&entry->cfe_last_action);
2579
2580 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2581 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2582 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2583 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2584 }
2585
2586 cfil_rw_lock_exclusive(&cfil_lck_rw);
2587
2588 /*
2589 * Create a cfi_sock_id that's not the socket pointer!
2590 */
2591
2592 if (hash_entry == NULL) {
2593 // This is the TCP case, cfil_info is tracked per socket
2594 if (inp->inp_flowhash == 0) {
2595 inp->inp_flowhash = inp_calc_flowhash(inp);
2596 }
2597
2598 so->so_cfil = cfil_info;
2599 cfil_info->cfi_so = so;
2600 cfil_info->cfi_sock_id =
2601 ((so->so_gencnt << 32) | inp->inp_flowhash);
2602 } else {
2603 // This is the UDP case, cfil_info is tracked in per-socket hash
2604 cfil_info->cfi_so = so;
2605 hash_entry->cfentry_cfil = cfil_info;
2606 cfil_info->cfi_hash_entry = hash_entry;
2607 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2608 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2609 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2610
2611 // Wake up gc thread if this is first flow added
2612 if (cfil_sock_udp_attached_count == 0) {
2613 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2614 }
2615
2616 cfil_sock_udp_attached_count++;
2617 }
2618
2619 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2620 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2621
2622 cfil_sock_attached_count++;
2623
2624 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2625
2626 done:
2627 if (cfil_info != NULL) {
2628 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2629 } else {
2630 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2631 }
2632
2633 return cfil_info;
2634 }
2635
2636 int
2637 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2638 {
2639 int kcunit;
2640 int attached = 0;
2641
2642 CFIL_LOG(LOG_INFO, "");
2643
2644 socket_lock_assert_owned(so);
2645
2646 cfil_rw_lock_exclusive(&cfil_lck_rw);
2647
2648 for (kcunit = 1;
2649 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2650 kcunit++) {
2651 struct content_filter *cfc = content_filters[kcunit - 1];
2652 struct cfil_entry *entry;
2653 struct cfil_entry *iter_entry;
2654 struct cfil_entry *iter_prev;
2655
2656 if (cfc == NULL) {
2657 continue;
2658 }
2659 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2660 continue;
2661 }
2662
2663 entry = &cfil_info->cfi_entries[kcunit - 1];
2664
2665 entry->cfe_filter = cfc;
2666 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2667 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2668 cfc->cf_sock_count++;
2669
2670 /* Insert the entry into the list ordered by control unit */
2671 iter_prev = NULL;
2672 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2673 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2674 break;
2675 }
2676 iter_prev = iter_entry;
2677 }
2678
2679 if (iter_prev == NULL) {
2680 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2681 } else {
2682 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2683 }
2684
2685 verify_content_filter(cfc);
2686 attached = 1;
2687 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2688 }
2689
2690 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2691
2692 return attached;
2693 }
2694
2695 static void
2696 cfil_info_free(struct cfil_info *cfil_info)
2697 {
2698 int kcunit;
2699 uint64_t in_drain = 0;
2700 uint64_t out_drained = 0;
2701
2702 if (cfil_info == NULL) {
2703 return;
2704 }
2705
2706 CFIL_LOG(LOG_INFO, "");
2707
2708 cfil_rw_lock_exclusive(&cfil_lck_rw);
2709
2710 for (kcunit = 1;
2711 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2712 kcunit++) {
2713 struct cfil_entry *entry;
2714 struct content_filter *cfc;
2715
2716 entry = &cfil_info->cfi_entries[kcunit - 1];
2717
2718 /* Don't be silly and try to detach twice */
2719 if (entry->cfe_filter == NULL) {
2720 continue;
2721 }
2722
2723 cfc = content_filters[kcunit - 1];
2724
2725 VERIFY(cfc == entry->cfe_filter);
2726
2727 entry->cfe_filter = NULL;
2728 entry->cfe_necp_control_unit = 0;
2729 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2730 cfc->cf_sock_count--;
2731
2732 verify_content_filter(cfc);
2733 }
2734 if (cfil_info->cfi_hash_entry != NULL) {
2735 cfil_sock_udp_attached_count--;
2736 }
2737 cfil_sock_attached_count--;
2738 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2739
2740 // Turn off stats reporting for cfil_info.
2741 cfil_info_stats_toggle(cfil_info, NULL, 0);
2742
2743 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2744 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2745
2746 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2747 struct cfil_entry *entry;
2748
2749 entry = &cfil_info->cfi_entries[kcunit - 1];
2750 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2751 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2752 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2753 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2754 }
2755 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2756
2757 if (out_drained) {
2758 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2759 }
2760 if (in_drain) {
2761 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2762 }
2763
2764 zfree(cfil_info_zone, cfil_info);
2765 }
2766
2767 /*
2768 * Received a verdict from userspace for a socket.
2769 * Perform any delayed operation if needed.
2770 */
2771 static void
2772 cfil_sock_received_verdict(struct socket *so)
2773 {
2774 if (so == NULL || so->so_cfil == NULL) {
2775 return;
2776 }
2777
2778 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2779
2780 /*
2781 * If socket has already been connected, trigger
2782 * soisconnected now.
2783 */
2784 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2785 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2786 soisconnected(so);
2787 return;
2788 }
2789 }
2790
2791 /*
2792 * Entry point from Sockets layer
2793 * The socket is locked.
2794 *
2795 * Checks if a connected socket is subject to filter and
2796 * pending the initial verdict.
2797 */
2798 boolean_t
2799 cfil_sock_connected_pending_verdict(struct socket *so)
2800 {
2801 if (so == NULL || so->so_cfil == NULL) {
2802 return false;
2803 }
2804
2805 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2806 return false;
2807 } else {
2808 /*
2809 * Remember that this protocol is already connected, so
2810 * we will trigger soisconnected() upon receipt of
2811 * initial verdict later.
2812 */
2813 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2814 return true;
2815 }
2816 }
2817
2818 boolean_t
2819 cfil_filter_present(void)
2820 {
2821 return cfil_active_count > 0;
2822 }
2823
2824 /*
2825 * Entry point from Sockets layer
2826 * The socket is locked.
2827 */
2828 errno_t
2829 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2830 {
2831 errno_t error = 0;
2832 uint32_t filter_control_unit;
2833
2834 socket_lock_assert_owned(so);
2835
2836 /* Limit ourselves to TCP that are not MPTCP subflows */
2837 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2838 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2839 so->so_proto->pr_type != SOCK_STREAM ||
2840 so->so_proto->pr_protocol != IPPROTO_TCP ||
2841 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2842 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2843 goto done;
2844 }
2845
2846 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2847 if (filter_control_unit == 0) {
2848 goto done;
2849 }
2850
2851 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2852 goto done;
2853 }
2854 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2855 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2856 goto done;
2857 }
2858 if (cfil_active_count == 0) {
2859 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2860 goto done;
2861 }
2862 if (so->so_cfil != NULL) {
2863 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2864 CFIL_LOG(LOG_ERR, "already attached");
2865 } else {
2866 cfil_info_alloc(so, NULL);
2867 if (so->so_cfil == NULL) {
2868 error = ENOMEM;
2869 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2870 goto done;
2871 }
2872 so->so_cfil->cfi_dir = dir;
2873 }
2874 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2875 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2876 filter_control_unit);
2877 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2878 goto done;
2879 }
2880 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2881 (uint64_t)VM_KERNEL_ADDRPERM(so),
2882 filter_control_unit, so->so_cfil->cfi_sock_id);
2883
2884 so->so_flags |= SOF_CONTENT_FILTER;
2885 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2886
2887 /* Hold a reference on the socket */
2888 so->so_usecount++;
2889
2890 /*
2891 * Save passed addresses for attach event msg (in case resend
2892 * is needed.
2893 */
2894 if (remote != NULL) {
2895 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2896 }
2897 if (local != NULL) {
2898 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2899 }
2900
2901 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2902 /* We can recover from flow control or out of memory errors */
2903 if (error == ENOBUFS || error == ENOMEM) {
2904 error = 0;
2905 } else if (error != 0) {
2906 goto done;
2907 }
2908
2909 CFIL_INFO_VERIFY(so->so_cfil);
2910 done:
2911 return error;
2912 }
2913
2914 /*
2915 * Entry point from Sockets layer
2916 * The socket is locked.
2917 */
2918 errno_t
2919 cfil_sock_detach(struct socket *so)
2920 {
2921 if (IS_IP_DGRAM(so)) {
2922 cfil_db_free(so);
2923 return 0;
2924 }
2925
2926 if (so->so_cfil) {
2927 if (so->so_flags & SOF_CONTENT_FILTER) {
2928 so->so_flags &= ~SOF_CONTENT_FILTER;
2929 VERIFY(so->so_usecount > 0);
2930 so->so_usecount--;
2931 }
2932 cfil_info_free(so->so_cfil);
2933 so->so_cfil = NULL;
2934 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2935 }
2936 return 0;
2937 }
2938
2939 /*
2940 * Fill in the address info of an event message from either
2941 * the socket or passed in address info.
2942 */
2943 static void
2944 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2945 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2946 boolean_t isIPv4, boolean_t outgoing)
2947 {
2948 if (isIPv4) {
2949 struct in_addr laddr = {0}, faddr = {0};
2950 u_int16_t lport = 0, fport = 0;
2951
2952 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2953
2954 if (outgoing) {
2955 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2956 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2957 } else {
2958 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2959 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2960 }
2961 } else {
2962 struct in6_addr *laddr = NULL, *faddr = NULL;
2963 u_int16_t lport = 0, fport = 0;
2964
2965 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2966 if (outgoing) {
2967 fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2968 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2969 } else {
2970 fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2971 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2972 }
2973 }
2974 }
2975
2976 static boolean_t
2977 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2978 struct cfil_info *cfil_info,
2979 struct cfil_msg_sock_attached *msg)
2980 {
2981 struct cfil_crypto_data data = {};
2982
2983 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2984 return false;
2985 }
2986
2987 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2988 data.direction = msg->cfs_conn_dir;
2989
2990 data.pid = msg->cfs_pid;
2991 data.effective_pid = msg->cfs_e_pid;
2992 uuid_copy(data.uuid, msg->cfs_uuid);
2993 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2994 data.socketProtocol = msg->cfs_sock_protocol;
2995 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2996 data.remote.sin6 = msg->cfs_dst.sin6;
2997 data.local.sin6 = msg->cfs_src.sin6;
2998 } else {
2999 data.remote.sin6 = msg->cfs_src.sin6;
3000 data.local.sin6 = msg->cfs_dst.sin6;
3001 }
3002
3003 // At attach, if local address is already present, no need to re-sign subsequent data messages.
3004 if (!NULLADDRESS(data.local)) {
3005 cfil_info->cfi_isSignatureLatest = true;
3006 }
3007
3008 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3009 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3010 msg->cfs_signature_length = 0;
3011 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
3012 msg->cfs_msghdr.cfm_sock_id);
3013 return false;
3014 }
3015
3016 return true;
3017 }
3018
3019 static boolean_t
3020 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3021 struct socket *so, struct cfil_info *cfil_info,
3022 struct cfil_msg_data_event *msg)
3023 {
3024 struct cfil_crypto_data data = {};
3025
3026 if (crypto_state == NULL || msg == NULL ||
3027 so == NULL || cfil_info == NULL) {
3028 return false;
3029 }
3030
3031 data.sock_id = cfil_info->cfi_sock_id;
3032 data.direction = cfil_info->cfi_dir;
3033 data.pid = so->last_pid;
3034 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3035 if (so->so_flags & SOF_DELEGATED) {
3036 data.effective_pid = so->e_pid;
3037 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3038 } else {
3039 data.effective_pid = so->last_pid;
3040 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3041 }
3042 data.socketProtocol = so->so_proto->pr_protocol;
3043
3044 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3045 data.remote.sin6 = msg->cfc_dst.sin6;
3046 data.local.sin6 = msg->cfc_src.sin6;
3047 } else {
3048 data.remote.sin6 = msg->cfc_src.sin6;
3049 data.local.sin6 = msg->cfc_dst.sin6;
3050 }
3051
3052 // At first data, local address may show up for the first time, update address cache and
3053 // no need to re-sign subsequent data messages anymore.
3054 if (!NULLADDRESS(data.local)) {
3055 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3056 cfil_info->cfi_isSignatureLatest = true;
3057 }
3058
3059 msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3060 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3061 msg->cfd_signature_length = 0;
3062 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3063 msg->cfd_msghdr.cfm_sock_id);
3064 return false;
3065 }
3066
3067 return true;
3068 }
3069
3070 static boolean_t
3071 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3072 struct socket *so, struct cfil_info *cfil_info,
3073 struct cfil_msg_sock_closed *msg)
3074 {
3075 struct cfil_crypto_data data = {};
3076 struct cfil_hash_entry hash_entry = {};
3077 struct cfil_hash_entry *hash_entry_ptr = NULL;
3078 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3079
3080 if (crypto_state == NULL || msg == NULL ||
3081 so == NULL || inp == NULL || cfil_info == NULL) {
3082 return false;
3083 }
3084
3085 data.sock_id = cfil_info->cfi_sock_id;
3086 data.direction = cfil_info->cfi_dir;
3087
3088 data.pid = so->last_pid;
3089 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3090 if (so->so_flags & SOF_DELEGATED) {
3091 data.effective_pid = so->e_pid;
3092 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3093 } else {
3094 data.effective_pid = so->last_pid;
3095 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3096 }
3097 data.socketProtocol = so->so_proto->pr_protocol;
3098
3099 /*
3100 * Fill in address info:
3101 * For UDP, use the cfil_info hash entry directly.
3102 * For TCP, compose an hash entry with the saved addresses.
3103 */
3104 if (cfil_info->cfi_hash_entry != NULL) {
3105 hash_entry_ptr = cfil_info->cfi_hash_entry;
3106 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3107 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3108 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3109 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3110 hash_entry_ptr = &hash_entry;
3111 }
3112 if (hash_entry_ptr != NULL) {
3113 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3114 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3115 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3116 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3117 }
3118
3119 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3120 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3121
3122 msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3123 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3124 msg->cfc_signature_length = 0;
3125 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3126 msg->cfc_msghdr.cfm_sock_id);
3127 return false;
3128 }
3129
3130 return true;
3131 }
3132
3133 static int
3134 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3135 uint32_t kcunit, int conn_dir)
3136 {
3137 errno_t error = 0;
3138 struct cfil_entry *entry = NULL;
3139 struct cfil_msg_sock_attached msg_attached;
3140 struct content_filter *cfc = NULL;
3141 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3142 struct cfil_hash_entry *hash_entry_ptr = NULL;
3143 struct cfil_hash_entry hash_entry;
3144
3145 memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3146 proc_t p = PROC_NULL;
3147 task_t t = TASK_NULL;
3148
3149 socket_lock_assert_owned(so);
3150
3151 cfil_rw_lock_shared(&cfil_lck_rw);
3152
3153 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3154 error = EINVAL;
3155 goto done;
3156 }
3157
3158 if (kcunit == 0) {
3159 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3160 } else {
3161 entry = &cfil_info->cfi_entries[kcunit - 1];
3162 }
3163
3164 if (entry == NULL) {
3165 goto done;
3166 }
3167
3168 cfc = entry->cfe_filter;
3169 if (cfc == NULL) {
3170 goto done;
3171 }
3172
3173 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3174 goto done;
3175 }
3176
3177 if (kcunit == 0) {
3178 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3179 }
3180
3181 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3182 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3183
3184 /* Would be wasteful to try when flow controlled */
3185 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3186 error = ENOBUFS;
3187 goto done;
3188 }
3189
3190 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3191 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3192 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3193 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3194 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3195 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3196
3197 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3198 msg_attached.cfs_sock_type = so->so_proto->pr_type;
3199 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3200 msg_attached.cfs_pid = so->last_pid;
3201 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3202 if (so->so_flags & SOF_DELEGATED) {
3203 msg_attached.cfs_e_pid = so->e_pid;
3204 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3205 } else {
3206 msg_attached.cfs_e_pid = so->last_pid;
3207 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3208 }
3209
3210 /*
3211 * Fill in address info:
3212 * For UDP, use the cfil_info hash entry directly.
3213 * For TCP, compose an hash entry with the saved addresses.
3214 */
3215 if (cfil_info->cfi_hash_entry != NULL) {
3216 hash_entry_ptr = cfil_info->cfi_hash_entry;
3217 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3218 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3219 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3220 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3221 hash_entry_ptr = &hash_entry;
3222 }
3223 if (hash_entry_ptr != NULL) {
3224 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3225 &msg_attached.cfs_src, &msg_attached.cfs_dst,
3226 !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3227 }
3228 msg_attached.cfs_conn_dir = conn_dir;
3229
3230 if (msg_attached.cfs_e_pid != 0) {
3231 p = proc_find(msg_attached.cfs_e_pid);
3232 if (p != PROC_NULL) {
3233 t = proc_task(p);
3234 if (t != TASK_NULL) {
3235 audit_token_t audit_token;
3236 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3237 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3238 memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3239 } else {
3240 CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3241 entry->cfe_cfil_info->cfi_sock_id);
3242 }
3243 }
3244 proc_rele(p);
3245 }
3246 }
3247
3248 if (cfil_info->cfi_debug) {
3249 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3250 }
3251
3252 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3253
3254 #if LIFECYCLE_DEBUG
3255 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3256 entry->cfe_cfil_info->cfi_sock_id);
3257 #endif
3258
3259 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3260 entry->cfe_filter->cf_kcunit,
3261 &msg_attached,
3262 sizeof(struct cfil_msg_sock_attached),
3263 CTL_DATA_EOR);
3264 if (error != 0) {
3265 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3266 goto done;
3267 }
3268 microuptime(&entry->cfe_last_event);
3269 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3270 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3271
3272 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3273 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3274 done:
3275
3276 /* We can recover from flow control */
3277 if (error == ENOBUFS) {
3278 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3279 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3280
3281 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3282 cfil_rw_lock_exclusive(&cfil_lck_rw);
3283 }
3284
3285 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3286
3287 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3288 } else {
3289 if (error != 0) {
3290 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3291 }
3292
3293 cfil_rw_unlock_shared(&cfil_lck_rw);
3294 }
3295 return error;
3296 }
3297
3298 static int
3299 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3300 {
3301 errno_t error = 0;
3302 struct mbuf *msg = NULL;
3303 struct cfil_entry *entry;
3304 struct cfe_buf *entrybuf;
3305 struct cfil_msg_hdr msg_disconnected;
3306 struct content_filter *cfc;
3307
3308 socket_lock_assert_owned(so);
3309
3310 cfil_rw_lock_shared(&cfil_lck_rw);
3311
3312 entry = &cfil_info->cfi_entries[kcunit - 1];
3313 if (outgoing) {
3314 entrybuf = &entry->cfe_snd;
3315 } else {
3316 entrybuf = &entry->cfe_rcv;
3317 }
3318
3319 cfc = entry->cfe_filter;
3320 if (cfc == NULL) {
3321 goto done;
3322 }
3323
3324 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3325 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3326
3327 /*
3328 * Send the disconnection event once
3329 */
3330 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3331 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3332 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3333 (uint64_t)VM_KERNEL_ADDRPERM(so));
3334 goto done;
3335 }
3336
3337 /*
3338 * We're not disconnected as long as some data is waiting
3339 * to be delivered to the filter
3340 */
3341 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3342 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3343 (uint64_t)VM_KERNEL_ADDRPERM(so));
3344 error = EBUSY;
3345 goto done;
3346 }
3347 /* Would be wasteful to try when flow controlled */
3348 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3349 error = ENOBUFS;
3350 goto done;
3351 }
3352
3353 if (cfil_info->cfi_debug) {
3354 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3355 }
3356
3357 #if LIFECYCLE_DEBUG
3358 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3359 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3360 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3361 #endif
3362
3363 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3364 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3365 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3366 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3367 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3368 CFM_OP_DISCONNECT_IN;
3369 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3370 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3371 entry->cfe_filter->cf_kcunit,
3372 &msg_disconnected,
3373 sizeof(struct cfil_msg_hdr),
3374 CTL_DATA_EOR);
3375 if (error != 0) {
3376 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3377 mbuf_freem(msg);
3378 goto done;
3379 }
3380 microuptime(&entry->cfe_last_event);
3381 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3382
3383 /* Remember we have sent the disconnection message */
3384 if (outgoing) {
3385 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3386 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3387 } else {
3388 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3389 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3390 }
3391 done:
3392 if (error == ENOBUFS) {
3393 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3394 OSIncrementAtomic(
3395 &cfil_stats.cfs_disconnect_event_flow_control);
3396
3397 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3398 cfil_rw_lock_exclusive(&cfil_lck_rw);
3399 }
3400
3401 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3402
3403 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3404 } else {
3405 if (error != 0) {
3406 OSIncrementAtomic(
3407 &cfil_stats.cfs_disconnect_event_fail);
3408 }
3409
3410 cfil_rw_unlock_shared(&cfil_lck_rw);
3411 }
3412 return error;
3413 }
3414
3415 int
3416 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3417 {
3418 struct cfil_entry *entry;
3419 struct cfil_msg_sock_closed msg_closed;
3420 errno_t error = 0;
3421 struct content_filter *cfc;
3422
3423 socket_lock_assert_owned(so);
3424
3425 cfil_rw_lock_shared(&cfil_lck_rw);
3426
3427 entry = &cfil_info->cfi_entries[kcunit - 1];
3428 cfc = entry->cfe_filter;
3429 if (cfc == NULL) {
3430 goto done;
3431 }
3432
3433 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3434 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3435
3436 /* Would be wasteful to try when flow controlled */
3437 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3438 error = ENOBUFS;
3439 goto done;
3440 }
3441 /*
3442 * Send a single closed message per filter
3443 */
3444 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3445 goto done;
3446 }
3447 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3448 goto done;
3449 }
3450
3451 microuptime(&entry->cfe_last_event);
3452 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3453
3454 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3455 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3456 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3457 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3458 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3459 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3460 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3461 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3462 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3463 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3464 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3465 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3466 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3467
3468 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3469
3470 if (cfil_info->cfi_debug) {
3471 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3472 }
3473
3474 #if LIFECYCLE_DEBUG
3475 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3476 #endif
3477 /* for debugging
3478 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3479 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3480 * }
3481 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3482 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3483 * }
3484 */
3485
3486 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3487 entry->cfe_filter->cf_kcunit,
3488 &msg_closed,
3489 sizeof(struct cfil_msg_sock_closed),
3490 CTL_DATA_EOR);
3491 if (error != 0) {
3492 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3493 error);
3494 goto done;
3495 }
3496
3497 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3498 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3499 done:
3500 /* We can recover from flow control */
3501 if (error == ENOBUFS) {
3502 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3503 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3504
3505 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3506 cfil_rw_lock_exclusive(&cfil_lck_rw);
3507 }
3508
3509 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3510
3511 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3512 } else {
3513 if (error != 0) {
3514 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3515 }
3516
3517 cfil_rw_unlock_shared(&cfil_lck_rw);
3518 }
3519
3520 return error;
3521 }
3522
3523 static void
3524 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3525 struct in6_addr *ip6, u_int16_t port)
3526 {
3527 if (sin46 == NULL) {
3528 return;
3529 }
3530
3531 struct sockaddr_in6 *sin6 = &sin46->sin6;
3532
3533 sin6->sin6_family = AF_INET6;
3534 sin6->sin6_len = sizeof(*sin6);
3535 sin6->sin6_port = port;
3536 sin6->sin6_addr = *ip6;
3537 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3538 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3539 sin6->sin6_addr.s6_addr16[1] = 0;
3540 }
3541 }
3542
3543 static void
3544 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3545 struct in_addr ip, u_int16_t port)
3546 {
3547 if (sin46 == NULL) {
3548 return;
3549 }
3550
3551 struct sockaddr_in *sin = &sin46->sin;
3552
3553 sin->sin_family = AF_INET;
3554 sin->sin_len = sizeof(*sin);
3555 sin->sin_port = port;
3556 sin->sin_addr.s_addr = ip.s_addr;
3557 }
3558
3559 static void
3560 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3561 struct in6_addr **laddr, struct in6_addr **faddr,
3562 u_int16_t *lport, u_int16_t *fport)
3563 {
3564 if (entry != NULL) {
3565 *laddr = &entry->cfentry_laddr.addr6;
3566 *faddr = &entry->cfentry_faddr.addr6;
3567 *lport = entry->cfentry_lport;
3568 *fport = entry->cfentry_fport;
3569 } else {
3570 *laddr = &inp->in6p_laddr;
3571 *faddr = &inp->in6p_faddr;
3572 *lport = inp->inp_lport;
3573 *fport = inp->inp_fport;
3574 }
3575 }
3576
3577 static void
3578 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3579 struct in_addr *laddr, struct in_addr *faddr,
3580 u_int16_t *lport, u_int16_t *fport)
3581 {
3582 if (entry != NULL) {
3583 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3584 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3585 *lport = entry->cfentry_lport;
3586 *fport = entry->cfentry_fport;
3587 } else {
3588 *laddr = inp->inp_laddr;
3589 *faddr = inp->inp_faddr;
3590 *lport = inp->inp_lport;
3591 *fport = inp->inp_fport;
3592 }
3593 }
3594
3595 static int
3596 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3597 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3598 {
3599 errno_t error = 0;
3600 struct mbuf *copy = NULL;
3601 struct mbuf *msg = NULL;
3602 unsigned int one = 1;
3603 struct cfil_msg_data_event *data_req;
3604 size_t hdrsize;
3605 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3606 struct cfil_entry *entry;
3607 struct cfe_buf *entrybuf;
3608 struct content_filter *cfc;
3609 struct timeval tv;
3610 int inp_flags = 0;
3611
3612 cfil_rw_lock_shared(&cfil_lck_rw);
3613
3614 entry = &cfil_info->cfi_entries[kcunit - 1];
3615 if (outgoing) {
3616 entrybuf = &entry->cfe_snd;
3617 } else {
3618 entrybuf = &entry->cfe_rcv;
3619 }
3620
3621 cfc = entry->cfe_filter;
3622 if (cfc == NULL) {
3623 goto done;
3624 }
3625
3626 data = cfil_data_start(data);
3627 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3628 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3629 goto done;
3630 }
3631
3632 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3633 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3634
3635 socket_lock_assert_owned(so);
3636
3637 /* Would be wasteful to try */
3638 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3639 error = ENOBUFS;
3640 goto done;
3641 }
3642
3643 /* Make a copy of the data to pass to kernel control socket */
3644 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3645 M_COPYM_NOOP_HDR);
3646 if (copy == NULL) {
3647 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3648 error = ENOMEM;
3649 goto done;
3650 }
3651
3652 /* We need an mbuf packet for the message header */
3653 hdrsize = sizeof(struct cfil_msg_data_event);
3654 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3655 if (error != 0) {
3656 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3657 m_freem(copy);
3658 /*
3659 * ENOBUFS is to indicate flow control
3660 */
3661 error = ENOMEM;
3662 goto done;
3663 }
3664 mbuf_setlen(msg, hdrsize);
3665 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3666 msg->m_next = copy;
3667 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3668 bzero(data_req, hdrsize);
3669 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3670 data_req->cfd_msghdr.cfm_version = 1;
3671 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3672 data_req->cfd_msghdr.cfm_op =
3673 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3674 data_req->cfd_msghdr.cfm_sock_id =
3675 entry->cfe_cfil_info->cfi_sock_id;
3676 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3677 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3678
3679 data_req->cfd_flags = 0;
3680 if (OPTIONAL_IP_HEADER(so)) {
3681 /*
3682 * For non-UDP/TCP traffic, indicate to filters if optional
3683 * IP header is present:
3684 * outgoing - indicate according to INP_HDRINCL flag
3685 * incoming - For IPv4 only, stripping of IP header is
3686 * optional. But for CFIL, we delay stripping
3687 * at rip_input. So CFIL always expects IP
3688 * frames. IP header will be stripped according
3689 * to INP_STRIPHDR flag later at reinjection.
3690 */
3691 if ((!outgoing && !IS_INP_V6(inp)) ||
3692 (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3693 data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3694 }
3695 }
3696
3697 /*
3698 * Copy address/port into event msg.
3699 * For non connected sockets need to copy addresses from passed
3700 * parameters
3701 */
3702 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3703 &data_req->cfc_src, &data_req->cfc_dst,
3704 !IS_INP_V6(inp), outgoing);
3705
3706 if (cfil_info->cfi_debug) {
3707 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3708 }
3709
3710 if (cfil_info->cfi_isSignatureLatest == false) {
3711 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3712 }
3713
3714 microuptime(&tv);
3715 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3716
3717 /* Pass the message to the content filter */
3718 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3719 entry->cfe_filter->cf_kcunit,
3720 msg, CTL_DATA_EOR);
3721 if (error != 0) {
3722 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3723 mbuf_freem(msg);
3724 goto done;
3725 }
3726 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3727 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3728
3729 #if VERDICT_DEBUG
3730 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3731 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3732 #endif
3733
3734 if (cfil_info->cfi_debug) {
3735 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3736 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3737 data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3738 }
3739
3740 done:
3741 if (error == ENOBUFS) {
3742 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3743 OSIncrementAtomic(
3744 &cfil_stats.cfs_data_event_flow_control);
3745
3746 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3747 cfil_rw_lock_exclusive(&cfil_lck_rw);
3748 }
3749
3750 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3751
3752 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3753 } else {
3754 if (error != 0) {
3755 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3756 }
3757
3758 cfil_rw_unlock_shared(&cfil_lck_rw);
3759 }
3760 return error;
3761 }
3762
3763 /*
3764 * Process the queue of data waiting to be delivered to content filter
3765 */
3766 static int
3767 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3768 {
3769 errno_t error = 0;
3770 struct mbuf *data, *tmp = NULL;
3771 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3772 struct cfil_entry *entry;
3773 struct cfe_buf *entrybuf;
3774 uint64_t currentoffset = 0;
3775
3776 if (cfil_info == NULL) {
3777 return 0;
3778 }
3779
3780 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3781 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3782
3783 socket_lock_assert_owned(so);
3784
3785 entry = &cfil_info->cfi_entries[kcunit - 1];
3786 if (outgoing) {
3787 entrybuf = &entry->cfe_snd;
3788 } else {
3789 entrybuf = &entry->cfe_rcv;
3790 }
3791
3792 /* Send attached message if not yet done */
3793 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3794 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3795 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3796 if (error != 0) {
3797 /* We can recover from flow control */
3798 if (error == ENOBUFS || error == ENOMEM) {
3799 error = 0;
3800 }
3801 goto done;
3802 }
3803 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3804 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3805 goto done;
3806 }
3807
3808 #if DATA_DEBUG
3809 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3810 entrybuf->cfe_pass_offset,
3811 entrybuf->cfe_peeked,
3812 entrybuf->cfe_peek_offset);
3813 #endif
3814
3815 /* Move all data that can pass */
3816 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3817 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3818 datalen = cfil_data_length(data, NULL, NULL);
3819 tmp = data;
3820
3821 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3822 entrybuf->cfe_pass_offset) {
3823 /*
3824 * The first mbuf can fully pass
3825 */
3826 copylen = datalen;
3827 } else {
3828 /*
3829 * The first mbuf can partially pass
3830 */
3831 copylen = entrybuf->cfe_pass_offset -
3832 entrybuf->cfe_ctl_q.q_start;
3833 }
3834 VERIFY(copylen <= datalen);
3835
3836 #if DATA_DEBUG
3837 CFIL_LOG(LOG_DEBUG,
3838 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3839 "datalen %u copylen %u",
3840 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3841 entrybuf->cfe_ctl_q.q_start,
3842 entrybuf->cfe_peeked,
3843 entrybuf->cfe_pass_offset,
3844 entrybuf->cfe_peek_offset,
3845 datalen, copylen);
3846 #endif
3847
3848 /*
3849 * Data that passes has been peeked at explicitly or
3850 * implicitly
3851 */
3852 if (entrybuf->cfe_ctl_q.q_start + copylen >
3853 entrybuf->cfe_peeked) {
3854 entrybuf->cfe_peeked =
3855 entrybuf->cfe_ctl_q.q_start + copylen;
3856 }
3857 /*
3858 * Stop on partial pass
3859 */
3860 if (copylen < datalen) {
3861 break;
3862 }
3863
3864 /* All good, move full data from ctl queue to pending queue */
3865 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3866
3867 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3868 if (outgoing) {
3869 OSAddAtomic64(datalen,
3870 &cfil_stats.cfs_pending_q_out_enqueued);
3871 } else {
3872 OSAddAtomic64(datalen,
3873 &cfil_stats.cfs_pending_q_in_enqueued);
3874 }
3875 }
3876 CFIL_INFO_VERIFY(cfil_info);
3877 if (tmp != NULL) {
3878 CFIL_LOG(LOG_DEBUG,
3879 "%llx first %llu peeked %llu pass %llu peek %llu"
3880 "datalen %u copylen %u",
3881 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3882 entrybuf->cfe_ctl_q.q_start,
3883 entrybuf->cfe_peeked,
3884 entrybuf->cfe_pass_offset,
3885 entrybuf->cfe_peek_offset,
3886 datalen, copylen);
3887 }
3888 tmp = NULL;
3889
3890 /* Now deal with remaining data the filter wants to peek at */
3891 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3892 currentoffset = entrybuf->cfe_ctl_q.q_start;
3893 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3894 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3895 currentoffset += datalen) {
3896 datalen = cfil_data_length(data, NULL, NULL);
3897 tmp = data;
3898
3899 /* We've already peeked at this mbuf */
3900 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3901 continue;
3902 }
3903 /*
3904 * The data in the first mbuf may have been
3905 * partially peeked at
3906 */
3907 copyoffset = entrybuf->cfe_peeked - currentoffset;
3908 VERIFY(copyoffset < datalen);
3909 copylen = datalen - copyoffset;
3910 VERIFY(copylen <= datalen);
3911 /*
3912 * Do not copy more than needed
3913 */
3914 if (currentoffset + copyoffset + copylen >
3915 entrybuf->cfe_peek_offset) {
3916 copylen = entrybuf->cfe_peek_offset -
3917 (currentoffset + copyoffset);
3918 }
3919
3920 #if DATA_DEBUG
3921 CFIL_LOG(LOG_DEBUG,
3922 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3923 "datalen %u copylen %u copyoffset %u",
3924 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3925 currentoffset,
3926 entrybuf->cfe_peeked,
3927 entrybuf->cfe_pass_offset,
3928 entrybuf->cfe_peek_offset,
3929 datalen, copylen, copyoffset);
3930 #endif
3931
3932 /*
3933 * Stop if there is nothing more to peek at
3934 */
3935 if (copylen == 0) {
3936 break;
3937 }
3938 /*
3939 * Let the filter get a peek at this span of data
3940 */
3941 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3942 outgoing, data, copyoffset, copylen);
3943 if (error != 0) {
3944 /* On error, leave data in ctl_q */
3945 break;
3946 }
3947 entrybuf->cfe_peeked += copylen;
3948 if (outgoing) {
3949 OSAddAtomic64(copylen,
3950 &cfil_stats.cfs_ctl_q_out_peeked);
3951 } else {
3952 OSAddAtomic64(copylen,
3953 &cfil_stats.cfs_ctl_q_in_peeked);
3954 }
3955
3956 /* Stop when data could not be fully peeked at */
3957 if (copylen + copyoffset < datalen) {
3958 break;
3959 }
3960 }
3961 CFIL_INFO_VERIFY(cfil_info);
3962 if (tmp != NULL) {
3963 CFIL_LOG(LOG_DEBUG,
3964 "%llx first %llu peeked %llu pass %llu peek %llu"
3965 "datalen %u copylen %u copyoffset %u",
3966 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3967 currentoffset,
3968 entrybuf->cfe_peeked,
3969 entrybuf->cfe_pass_offset,
3970 entrybuf->cfe_peek_offset,
3971 datalen, copylen, copyoffset);
3972 }
3973
3974 /*
3975 * Process data that has passed the filter
3976 */
3977 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3978 if (error != 0) {
3979 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3980 error);
3981 goto done;
3982 }
3983
3984 /*
3985 * Dispatch disconnect events that could not be sent
3986 */
3987 if (cfil_info == NULL) {
3988 goto done;
3989 } else if (outgoing) {
3990 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3991 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3992 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3993 }
3994 } else {
3995 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3996 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3997 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3998 }
3999 }
4000
4001 done:
4002 CFIL_LOG(LOG_DEBUG,
4003 "first %llu peeked %llu pass %llu peek %llu",
4004 entrybuf->cfe_ctl_q.q_start,
4005 entrybuf->cfe_peeked,
4006 entrybuf->cfe_pass_offset,
4007 entrybuf->cfe_peek_offset);
4008
4009 CFIL_INFO_VERIFY(cfil_info);
4010 return error;
4011 }
4012
4013 /*
4014 * cfil_data_filter()
4015 *
4016 * Process data for a content filter installed on a socket
4017 */
4018 int
4019 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4020 struct mbuf *data, uint64_t datalen)
4021 {
4022 errno_t error = 0;
4023 struct cfil_entry *entry;
4024 struct cfe_buf *entrybuf;
4025
4026 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4027 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4028
4029 socket_lock_assert_owned(so);
4030
4031 entry = &cfil_info->cfi_entries[kcunit - 1];
4032 if (outgoing) {
4033 entrybuf = &entry->cfe_snd;
4034 } else {
4035 entrybuf = &entry->cfe_rcv;
4036 }
4037
4038 /* Are we attached to the filter? */
4039 if (entry->cfe_filter == NULL) {
4040 error = 0;
4041 goto done;
4042 }
4043
4044 /* Dispatch to filters */
4045 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4046 if (outgoing) {
4047 OSAddAtomic64(datalen,
4048 &cfil_stats.cfs_ctl_q_out_enqueued);
4049 } else {
4050 OSAddAtomic64(datalen,
4051 &cfil_stats.cfs_ctl_q_in_enqueued);
4052 }
4053
4054 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4055 if (error != 0) {
4056 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4057 error);
4058 }
4059 /*
4060 * We have to return EJUSTRETURN in all cases to avoid double free
4061 * by socket layer
4062 */
4063 error = EJUSTRETURN;
4064 done:
4065 CFIL_INFO_VERIFY(cfil_info);
4066
4067 CFIL_LOG(LOG_INFO, "return %d", error);
4068 return error;
4069 }
4070
4071 /*
4072 * cfil_service_inject_queue() re-inject data that passed the
4073 * content filters
4074 */
4075 static int
4076 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4077 {
4078 mbuf_t data;
4079 unsigned int datalen;
4080 int mbcnt = 0;
4081 int mbnum = 0;
4082 errno_t error = 0;
4083 struct cfi_buf *cfi_buf;
4084 struct cfil_queue *inject_q;
4085 int need_rwakeup = 0;
4086 int count = 0;
4087 struct inpcb *inp = NULL;
4088 struct ip *ip = NULL;
4089 unsigned int hlen;
4090
4091 if (cfil_info == NULL) {
4092 return 0;
4093 }
4094
4095 socket_lock_assert_owned(so);
4096
4097 if (outgoing) {
4098 cfi_buf = &cfil_info->cfi_snd;
4099 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4100 } else {
4101 cfi_buf = &cfil_info->cfi_rcv;
4102 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4103 }
4104 inject_q = &cfi_buf->cfi_inject_q;
4105
4106 if (cfil_queue_empty(inject_q)) {
4107 return 0;
4108 }
4109
4110 #if DATA_DEBUG | VERDICT_DEBUG
4111 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4112 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4113 #endif
4114
4115 while ((data = cfil_queue_first(inject_q)) != NULL) {
4116 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4117
4118 #if DATA_DEBUG
4119 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4120 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4121 #endif
4122 if (cfil_info->cfi_debug) {
4123 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4124 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4125 }
4126
4127 /* Remove data from queue and adjust stats */
4128 cfil_queue_remove(inject_q, data, datalen);
4129 cfi_buf->cfi_pending_first += datalen;
4130 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4131 cfi_buf->cfi_pending_mbnum -= mbnum;
4132 cfil_info_buf_verify(cfi_buf);
4133
4134 if (outgoing) {
4135 error = sosend_reinject(so, NULL, data, NULL, 0);
4136 if (error != 0) {
4137 #if DATA_DEBUG
4138 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4139 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4140 #endif
4141 break;
4142 }
4143 // At least one injection succeeded, need to wake up pending threads.
4144 need_rwakeup = 1;
4145 } else {
4146 data->m_flags |= M_SKIPCFIL;
4147
4148 /*
4149 * NOTE: We currently only support TCP, UDP, ICMP,
4150 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4151 * need to call the appropriate sbappendxxx()
4152 * of fix sock_inject_data_in()
4153 */
4154 if (IS_IP_DGRAM(so)) {
4155 if (OPTIONAL_IP_HEADER(so)) {
4156 inp = sotoinpcb(so);
4157 if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4158 mbuf_t data_start = cfil_data_start(data);
4159 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4160 ip = mtod(data_start, struct ip *);
4161 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4162 data_start->m_len -= hlen;
4163 data_start->m_pkthdr.len -= hlen;
4164 data_start->m_data += hlen;
4165 }
4166 }
4167 }
4168
4169 if (sbappendchain(&so->so_rcv, data, 0)) {
4170 need_rwakeup = 1;
4171 }
4172 } else {
4173 if (sbappendstream(&so->so_rcv, data)) {
4174 need_rwakeup = 1;
4175 }
4176 }
4177 }
4178
4179 if (outgoing) {
4180 OSAddAtomic64(datalen,
4181 &cfil_stats.cfs_inject_q_out_passed);
4182 } else {
4183 OSAddAtomic64(datalen,
4184 &cfil_stats.cfs_inject_q_in_passed);
4185 }
4186
4187 count++;
4188 }
4189
4190 #if DATA_DEBUG | VERDICT_DEBUG
4191 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4192 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4193 #endif
4194 if (cfil_info->cfi_debug) {
4195 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4196 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4197 }
4198
4199 /* A single wakeup is for several packets is more efficient */
4200 if (need_rwakeup) {
4201 if (outgoing == TRUE) {
4202 sowwakeup(so);
4203 } else {
4204 sorwakeup(so);
4205 }
4206 }
4207
4208 if (error != 0 && cfil_info) {
4209 if (error == ENOBUFS) {
4210 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4211 }
4212 if (error == ENOMEM) {
4213 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4214 }
4215
4216 if (outgoing) {
4217 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4218 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4219 } else {
4220 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4221 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4222 }
4223 }
4224
4225 /*
4226 * Notify
4227 */
4228 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4229 cfil_sock_notify_shutdown(so, SHUT_WR);
4230 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4231 soshutdownlock_final(so, SHUT_WR);
4232 }
4233 }
4234 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4235 if (cfil_filters_attached(so) == 0) {
4236 CFIL_LOG(LOG_INFO, "so %llx waking",
4237 (uint64_t)VM_KERNEL_ADDRPERM(so));
4238 wakeup((caddr_t)cfil_info);
4239 }
4240 }
4241
4242 CFIL_INFO_VERIFY(cfil_info);
4243
4244 return error;
4245 }
4246
4247 static int
4248 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4249 {
4250 uint64_t passlen, curlen;
4251 mbuf_t data;
4252 unsigned int datalen;
4253 errno_t error = 0;
4254 struct cfil_entry *entry;
4255 struct cfe_buf *entrybuf;
4256 struct cfil_queue *pending_q;
4257
4258 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4259 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4260
4261 socket_lock_assert_owned(so);
4262
4263 entry = &cfil_info->cfi_entries[kcunit - 1];
4264 if (outgoing) {
4265 entrybuf = &entry->cfe_snd;
4266 } else {
4267 entrybuf = &entry->cfe_rcv;
4268 }
4269
4270 pending_q = &entrybuf->cfe_pending_q;
4271
4272 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4273
4274 /*
4275 * Locate the chunks of data that we can pass to the next filter
4276 * A data chunk must be on mbuf boundaries
4277 */
4278 curlen = 0;
4279 while ((data = cfil_queue_first(pending_q)) != NULL) {
4280 struct cfil_entry *iter_entry;
4281 datalen = cfil_data_length(data, NULL, NULL);
4282
4283 #if DATA_DEBUG
4284 CFIL_LOG(LOG_DEBUG,
4285 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4286 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4287 passlen, curlen);
4288 #endif
4289
4290 if (curlen + datalen > passlen) {
4291 break;
4292 }
4293
4294 cfil_queue_remove(pending_q, data, datalen);
4295
4296 curlen += datalen;
4297
4298 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4299 iter_entry != NULL;
4300 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4301 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4302 data, datalen);
4303 /* 0 means passed so we can continue */
4304 if (error != 0) {
4305 break;
4306 }
4307 }
4308 /* When data has passed all filters, re-inject */
4309 if (error == 0) {
4310 if (outgoing) {
4311 cfil_queue_enqueue(
4312 &cfil_info->cfi_snd.cfi_inject_q,
4313 data, datalen);
4314 OSAddAtomic64(datalen,
4315 &cfil_stats.cfs_inject_q_out_enqueued);
4316 } else {
4317 cfil_queue_enqueue(
4318 &cfil_info->cfi_rcv.cfi_inject_q,
4319 data, datalen);
4320 OSAddAtomic64(datalen,
4321 &cfil_stats.cfs_inject_q_in_enqueued);
4322 }
4323 }
4324 }
4325
4326 CFIL_INFO_VERIFY(cfil_info);
4327
4328 return error;
4329 }
4330
4331 int
4332 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4333 uint64_t pass_offset, uint64_t peek_offset)
4334 {
4335 errno_t error = 0;
4336 struct cfil_entry *entry = NULL;
4337 struct cfe_buf *entrybuf;
4338 int updated = 0;
4339
4340 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4341
4342 socket_lock_assert_owned(so);
4343
4344 if (cfil_info == NULL) {
4345 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4346 (uint64_t)VM_KERNEL_ADDRPERM(so));
4347 error = 0;
4348 goto done;
4349 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4350 CFIL_LOG(LOG_ERR, "so %llx drop set",
4351 (uint64_t)VM_KERNEL_ADDRPERM(so));
4352 error = EPIPE;
4353 goto done;
4354 }
4355
4356 entry = &cfil_info->cfi_entries[kcunit - 1];
4357 if (outgoing) {
4358 entrybuf = &entry->cfe_snd;
4359 } else {
4360 entrybuf = &entry->cfe_rcv;
4361 }
4362
4363 /* Record updated offsets for this content filter */
4364 if (pass_offset > entrybuf->cfe_pass_offset) {
4365 entrybuf->cfe_pass_offset = pass_offset;
4366
4367 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4368 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4369 }
4370 updated = 1;
4371 } else {
4372 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4373 pass_offset, entrybuf->cfe_pass_offset);
4374 }
4375 /* Filter does not want or need to see data that's allowed to pass */
4376 if (peek_offset > entrybuf->cfe_pass_offset &&
4377 peek_offset > entrybuf->cfe_peek_offset) {
4378 entrybuf->cfe_peek_offset = peek_offset;
4379 updated = 1;
4380 }
4381 /* Nothing to do */
4382 if (updated == 0) {
4383 goto done;
4384 }
4385
4386 /* Move data held in control queue to pending queue if needed */
4387 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4388 if (error != 0) {
4389 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4390 error);
4391 goto done;
4392 }
4393 error = EJUSTRETURN;
4394
4395 done:
4396 /*
4397 * The filter is effectively detached when pass all from both sides
4398 * or when the socket is closed and no more data is waiting
4399 * to be delivered to the filter
4400 */
4401 if (entry != NULL &&
4402 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4403 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4404 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4405 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4406 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4407 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4408 #if LIFECYCLE_DEBUG
4409 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4410 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4411 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4412 #endif
4413 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4414 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4415 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4416 cfil_filters_attached(so) == 0) {
4417 #if LIFECYCLE_DEBUG
4418 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4419 #endif
4420 CFIL_LOG(LOG_INFO, "so %llx waking",
4421 (uint64_t)VM_KERNEL_ADDRPERM(so));
4422 wakeup((caddr_t)cfil_info);
4423 }
4424 }
4425 CFIL_INFO_VERIFY(cfil_info);
4426 CFIL_LOG(LOG_INFO, "return %d", error);
4427 return error;
4428 }
4429
4430 /*
4431 * Update pass offset for socket when no data is pending
4432 */
4433 static int
4434 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4435 {
4436 struct cfi_buf *cfi_buf;
4437 struct cfil_entry *entry;
4438 struct cfe_buf *entrybuf;
4439 uint32_t kcunit;
4440 uint64_t pass_offset = 0;
4441
4442 if (cfil_info == NULL) {
4443 return 0;
4444 }
4445
4446 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4447 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4448
4449 socket_lock_assert_owned(so);
4450
4451 if (outgoing) {
4452 cfi_buf = &cfil_info->cfi_snd;
4453 } else {
4454 cfi_buf = &cfil_info->cfi_rcv;
4455 }
4456
4457 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4458 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4459 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4460
4461 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4462 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4463 entry = &cfil_info->cfi_entries[kcunit - 1];
4464
4465 /* Are we attached to a filter? */
4466 if (entry->cfe_filter == NULL) {
4467 continue;
4468 }
4469
4470 if (outgoing) {
4471 entrybuf = &entry->cfe_snd;
4472 } else {
4473 entrybuf = &entry->cfe_rcv;
4474 }
4475
4476 if (pass_offset == 0 ||
4477 entrybuf->cfe_pass_offset < pass_offset) {
4478 pass_offset = entrybuf->cfe_pass_offset;
4479 }
4480 }
4481 cfi_buf->cfi_pass_offset = pass_offset;
4482 }
4483
4484 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4485 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4486
4487 return 0;
4488 }
4489
4490 int
4491 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4492 uint64_t pass_offset, uint64_t peek_offset)
4493 {
4494 errno_t error = 0;
4495
4496 CFIL_LOG(LOG_INFO, "");
4497
4498 socket_lock_assert_owned(so);
4499
4500 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4501 if (error != 0) {
4502 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4503 (uint64_t)VM_KERNEL_ADDRPERM(so),
4504 outgoing ? "out" : "in");
4505 goto release;
4506 }
4507
4508 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4509 pass_offset, peek_offset);
4510
4511 cfil_service_inject_queue(so, cfil_info, outgoing);
4512
4513 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4514 release:
4515 CFIL_INFO_VERIFY(cfil_info);
4516 cfil_release_sockbuf(so, outgoing);
4517
4518 return error;
4519 }
4520
4521
4522 static void
4523 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4524 {
4525 struct cfil_entry *entry;
4526 int kcunit;
4527 uint64_t drained;
4528
4529 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4530 goto done;
4531 }
4532
4533 socket_lock_assert_owned(so);
4534
4535 /*
4536 * Flush the output queues and ignore errors as long as
4537 * we are attached
4538 */
4539 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4540 if (cfil_info != NULL) {
4541 drained = 0;
4542 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4543 entry = &cfil_info->cfi_entries[kcunit - 1];
4544
4545 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4546 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4547 }
4548 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4549
4550 if (drained) {
4551 if (cfil_info->cfi_flags & CFIF_DROP) {
4552 OSIncrementAtomic(
4553 &cfil_stats.cfs_flush_out_drop);
4554 } else {
4555 OSIncrementAtomic(
4556 &cfil_stats.cfs_flush_out_close);
4557 }
4558 }
4559 }
4560 cfil_release_sockbuf(so, 1);
4561
4562 /*
4563 * Flush the input queues
4564 */
4565 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4566 if (cfil_info != NULL) {
4567 drained = 0;
4568 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4569 entry = &cfil_info->cfi_entries[kcunit - 1];
4570
4571 drained += cfil_queue_drain(
4572 &entry->cfe_rcv.cfe_ctl_q);
4573 drained += cfil_queue_drain(
4574 &entry->cfe_rcv.cfe_pending_q);
4575 }
4576 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4577
4578 if (drained) {
4579 if (cfil_info->cfi_flags & CFIF_DROP) {
4580 OSIncrementAtomic(
4581 &cfil_stats.cfs_flush_in_drop);
4582 } else {
4583 OSIncrementAtomic(
4584 &cfil_stats.cfs_flush_in_close);
4585 }
4586 }
4587 }
4588 cfil_release_sockbuf(so, 0);
4589 done:
4590 CFIL_INFO_VERIFY(cfil_info);
4591 }
4592
4593 int
4594 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4595 {
4596 errno_t error = 0;
4597 struct cfil_entry *entry;
4598 struct proc *p;
4599
4600 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4601 goto done;
4602 }
4603
4604 socket_lock_assert_owned(so);
4605
4606 entry = &cfil_info->cfi_entries[kcunit - 1];
4607
4608 /* Are we attached to the filter? */
4609 if (entry->cfe_filter == NULL) {
4610 goto done;
4611 }
4612
4613 cfil_info->cfi_flags |= CFIF_DROP;
4614
4615 p = current_proc();
4616
4617 /*
4618 * Force the socket to be marked defunct
4619 * (forcing fixed along with rdar://19391339)
4620 */
4621 if (so->so_cfil_db == NULL) {
4622 error = sosetdefunct(p, so,
4623 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4624 FALSE);
4625
4626 /* Flush the socket buffer and disconnect */
4627 if (error == 0) {
4628 error = sodefunct(p, so,
4629 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4630 }
4631 }
4632
4633 /* The filter is done, mark as detached */
4634 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4635 #if LIFECYCLE_DEBUG
4636 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4637 #endif
4638 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4639 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4640
4641 /* Pending data needs to go */
4642 cfil_flush_queues(so, cfil_info);
4643
4644 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4645 if (cfil_filters_attached(so) == 0) {
4646 CFIL_LOG(LOG_INFO, "so %llx waking",
4647 (uint64_t)VM_KERNEL_ADDRPERM(so));
4648 wakeup((caddr_t)cfil_info);
4649 }
4650 }
4651 done:
4652 return error;
4653 }
4654
4655 int
4656 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4657 {
4658 errno_t error = 0;
4659 struct cfil_info *cfil_info = NULL;
4660
4661 bool cfil_attached = false;
4662 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4663
4664 // Search and lock socket
4665 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4666 if (so == NULL) {
4667 error = ENOENT;
4668 } else {
4669 // The client gets a pass automatically
4670 cfil_info = (so->so_cfil_db != NULL) ?
4671 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4672
4673 if (cfil_attached) {
4674 #if VERDICT_DEBUG
4675 if (cfil_info != NULL) {
4676 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4677 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4678 (uint64_t)VM_KERNEL_ADDRPERM(so),
4679 cfil_info->cfi_sock_id);
4680 }
4681 #endif
4682 cfil_sock_received_verdict(so);
4683 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4684 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4685 } else {
4686 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4687 }
4688 socket_unlock(so, 1);
4689 }
4690
4691 return error;
4692 }
4693
4694 int
4695 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4696 {
4697 struct content_filter *cfc = NULL;
4698 cfil_crypto_state_t crypto_state = NULL;
4699 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4700
4701 CFIL_LOG(LOG_NOTICE, "");
4702
4703 if (content_filters == NULL) {
4704 CFIL_LOG(LOG_ERR, "no content filter");
4705 return EINVAL;
4706 }
4707 if (kcunit > MAX_CONTENT_FILTER) {
4708 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4709 kcunit, MAX_CONTENT_FILTER);
4710 return EINVAL;
4711 }
4712 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4713 if (crypto_state == NULL) {
4714 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4715 kcunit);
4716 return EINVAL;
4717 }
4718
4719 cfil_rw_lock_exclusive(&cfil_lck_rw);
4720
4721 cfc = content_filters[kcunit - 1];
4722 if (cfc->cf_kcunit != kcunit) {
4723 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4724 kcunit);
4725 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4726 cfil_crypto_cleanup_state(crypto_state);
4727 return EINVAL;
4728 }
4729 if (cfc->cf_crypto_state != NULL) {
4730 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4731 cfc->cf_crypto_state = NULL;
4732 }
4733 cfc->cf_crypto_state = crypto_state;
4734
4735 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4736 return 0;
4737 }
4738
4739 static int
4740 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4741 {
4742 struct cfil_entry *entry;
4743 struct cfe_buf *entrybuf;
4744 uint32_t kcunit;
4745
4746 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4747 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4748
4749 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4750 entry = &cfil_info->cfi_entries[kcunit - 1];
4751
4752 /* Are we attached to the filter? */
4753 if (entry->cfe_filter == NULL) {
4754 continue;
4755 }
4756
4757 if (outgoing) {
4758 entrybuf = &entry->cfe_snd;
4759 } else {
4760 entrybuf = &entry->cfe_rcv;
4761 }
4762
4763 entrybuf->cfe_ctl_q.q_start += datalen;
4764 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4765 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4766 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4767 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4768 }
4769
4770 entrybuf->cfe_ctl_q.q_end += datalen;
4771
4772 entrybuf->cfe_pending_q.q_start += datalen;
4773 entrybuf->cfe_pending_q.q_end += datalen;
4774 }
4775 CFIL_INFO_VERIFY(cfil_info);
4776 return 0;
4777 }
4778
4779 int
4780 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4781 struct mbuf *data, struct mbuf *control, uint32_t flags)
4782 {
4783 #pragma unused(to, control, flags)
4784 errno_t error = 0;
4785 unsigned int datalen;
4786 int mbcnt = 0;
4787 int mbnum = 0;
4788 int kcunit;
4789 struct cfi_buf *cfi_buf;
4790 struct mbuf *chain = NULL;
4791
4792 if (cfil_info == NULL) {
4793 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4794 (uint64_t)VM_KERNEL_ADDRPERM(so));
4795 error = 0;
4796 goto done;
4797 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4798 CFIL_LOG(LOG_ERR, "so %llx drop set",
4799 (uint64_t)VM_KERNEL_ADDRPERM(so));
4800 error = EPIPE;
4801 goto done;
4802 }
4803
4804 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4805
4806 if (outgoing) {
4807 cfi_buf = &cfil_info->cfi_snd;
4808 cfil_info->cfi_byte_outbound_count += datalen;
4809 } else {
4810 cfi_buf = &cfil_info->cfi_rcv;
4811 cfil_info->cfi_byte_inbound_count += datalen;
4812 }
4813
4814 cfi_buf->cfi_pending_last += datalen;
4815 cfi_buf->cfi_pending_mbcnt += mbcnt;
4816 cfi_buf->cfi_pending_mbnum += mbnum;
4817
4818 if (IS_IP_DGRAM(so)) {
4819 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4820 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4821 cfi_buf->cfi_tail_drop_cnt++;
4822 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4823 cfi_buf->cfi_pending_mbnum -= mbnum;
4824 return EPIPE;
4825 }
4826 }
4827
4828 cfil_info_buf_verify(cfi_buf);
4829
4830 #if DATA_DEBUG
4831 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4832 (uint64_t)VM_KERNEL_ADDRPERM(so),
4833 outgoing ? "OUT" : "IN",
4834 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4835 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4836 cfi_buf->cfi_pending_last,
4837 cfi_buf->cfi_pending_mbcnt,
4838 cfi_buf->cfi_pass_offset);
4839 #endif
4840
4841 /* Fast path when below pass offset */
4842 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4843 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4844 #if DATA_DEBUG
4845 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4846 #endif
4847 } else {
4848 struct cfil_entry *iter_entry;
4849 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4850 // Is cfil attached to this filter?
4851 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4852 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4853 if (IS_IP_DGRAM(so) && chain == NULL) {
4854 /* Datagrams only:
4855 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4856 * This full chain will be reinjected into socket after recieving verdict.
4857 */
4858 (void) cfil_dgram_save_socket_state(cfil_info, data);
4859 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4860 if (chain == NULL) {
4861 return ENOBUFS;
4862 }
4863 data = chain;
4864 }
4865 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4866 datalen);
4867 }
4868 /* 0 means passed so continue with next filter */
4869 if (error != 0) {
4870 break;
4871 }
4872 }
4873 }
4874
4875 /* Move cursor if no filter claimed the data */
4876 if (error == 0) {
4877 cfi_buf->cfi_pending_first += datalen;
4878 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4879 cfi_buf->cfi_pending_mbnum -= mbnum;
4880 cfil_info_buf_verify(cfi_buf);
4881 }
4882 done:
4883 CFIL_INFO_VERIFY(cfil_info);
4884
4885 return error;
4886 }
4887
4888 /*
4889 * Callback from socket layer sosendxxx()
4890 */
4891 int
4892 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4893 struct mbuf *data, struct mbuf *control, uint32_t flags)
4894 {
4895 int error = 0;
4896
4897 if (IS_IP_DGRAM(so)) {
4898 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4899 }
4900
4901 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4902 return 0;
4903 }
4904
4905 /*
4906 * Pass initial data for TFO.
4907 */
4908 if (IS_INITIAL_TFO_DATA(so)) {
4909 return 0;
4910 }
4911
4912 socket_lock_assert_owned(so);
4913
4914 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4915 CFIL_LOG(LOG_ERR, "so %llx drop set",
4916 (uint64_t)VM_KERNEL_ADDRPERM(so));
4917 return EPIPE;
4918 }
4919 if (control != NULL) {
4920 CFIL_LOG(LOG_ERR, "so %llx control",
4921 (uint64_t)VM_KERNEL_ADDRPERM(so));
4922 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4923 }
4924 if ((flags & MSG_OOB)) {
4925 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4926 (uint64_t)VM_KERNEL_ADDRPERM(so));
4927 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4928 }
4929 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4930 panic("so %p SB_LOCK not set", so);
4931 }
4932
4933 if (so->so_snd.sb_cfil_thread != NULL) {
4934 panic("%s sb_cfil_thread %p not NULL", __func__,
4935 so->so_snd.sb_cfil_thread);
4936 }
4937
4938 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4939
4940 return error;
4941 }
4942
4943 /*
4944 * Callback from socket layer sbappendxxx()
4945 */
4946 int
4947 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4948 struct mbuf *data, struct mbuf *control, uint32_t flags)
4949 {
4950 int error = 0;
4951
4952 if (IS_IP_DGRAM(so)) {
4953 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4954 }
4955
4956 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4957 return 0;
4958 }
4959
4960 /*
4961 * Pass initial data for TFO.
4962 */
4963 if (IS_INITIAL_TFO_DATA(so)) {
4964 return 0;
4965 }
4966
4967 socket_lock_assert_owned(so);
4968
4969 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4970 CFIL_LOG(LOG_ERR, "so %llx drop set",
4971 (uint64_t)VM_KERNEL_ADDRPERM(so));
4972 return EPIPE;
4973 }
4974 if (control != NULL) {
4975 CFIL_LOG(LOG_ERR, "so %llx control",
4976 (uint64_t)VM_KERNEL_ADDRPERM(so));
4977 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4978 }
4979 if (data->m_type == MT_OOBDATA) {
4980 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4981 (uint64_t)VM_KERNEL_ADDRPERM(so));
4982 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4983 }
4984 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4985
4986 return error;
4987 }
4988
4989 /*
4990 * Callback from socket layer soshutdownxxx()
4991 *
4992 * We may delay the shutdown write if there's outgoing data in process.
4993 *
4994 * There is no point in delaying the shutdown read because the process
4995 * indicated that it does not want to read anymore data.
4996 */
4997 int
4998 cfil_sock_shutdown(struct socket *so, int *how)
4999 {
5000 int error = 0;
5001
5002 if (IS_IP_DGRAM(so)) {
5003 return cfil_sock_udp_shutdown(so, how);
5004 }
5005
5006 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5007 goto done;
5008 }
5009
5010 socket_lock_assert_owned(so);
5011
5012 CFIL_LOG(LOG_INFO, "so %llx how %d",
5013 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5014
5015 /*
5016 * Check the state of the socket before the content filter
5017 */
5018 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5019 /* read already shut down */
5020 error = ENOTCONN;
5021 goto done;
5022 }
5023 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5024 /* write already shut down */
5025 error = ENOTCONN;
5026 goto done;
5027 }
5028
5029 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5030 CFIL_LOG(LOG_ERR, "so %llx drop set",
5031 (uint64_t)VM_KERNEL_ADDRPERM(so));
5032 goto done;
5033 }
5034
5035 /*
5036 * shutdown read: SHUT_RD or SHUT_RDWR
5037 */
5038 if (*how != SHUT_WR) {
5039 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5040 error = ENOTCONN;
5041 goto done;
5042 }
5043 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5044 cfil_sock_notify_shutdown(so, SHUT_RD);
5045 }
5046 /*
5047 * shutdown write: SHUT_WR or SHUT_RDWR
5048 */
5049 if (*how != SHUT_RD) {
5050 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5051 error = ENOTCONN;
5052 goto done;
5053 }
5054 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5055 cfil_sock_notify_shutdown(so, SHUT_WR);
5056 /*
5057 * When outgoing data is pending, we delay the shutdown at the
5058 * protocol level until the content filters give the final
5059 * verdict on the pending data.
5060 */
5061 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5062 /*
5063 * When shutting down the read and write sides at once
5064 * we can proceed to the final shutdown of the read
5065 * side. Otherwise, we just return.
5066 */
5067 if (*how == SHUT_WR) {
5068 error = EJUSTRETURN;
5069 } else if (*how == SHUT_RDWR) {
5070 *how = SHUT_RD;
5071 }
5072 }
5073 }
5074 done:
5075 return error;
5076 }
5077
5078 /*
5079 * This is called when the socket is closed and there is no more
5080 * opportunity for filtering
5081 */
5082 void
5083 cfil_sock_is_closed(struct socket *so)
5084 {
5085 errno_t error = 0;
5086 int kcunit;
5087
5088 if (IS_IP_DGRAM(so)) {
5089 cfil_sock_udp_is_closed(so);
5090 return;
5091 }
5092
5093 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5094 return;
5095 }
5096
5097 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5098
5099 socket_lock_assert_owned(so);
5100
5101 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5102 /* Let the filters know of the closing */
5103 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5104 }
5105
5106 /* Last chance to push passed data out */
5107 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5108 if (error == 0) {
5109 cfil_service_inject_queue(so, so->so_cfil, 1);
5110 }
5111 cfil_release_sockbuf(so, 1);
5112
5113 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5114
5115 /* Pending data needs to go */
5116 cfil_flush_queues(so, so->so_cfil);
5117
5118 CFIL_INFO_VERIFY(so->so_cfil);
5119 }
5120
5121 /*
5122 * This is called when the socket is disconnected so let the filters
5123 * know about the disconnection and that no more data will come
5124 *
5125 * The how parameter has the same values as soshutown()
5126 */
5127 void
5128 cfil_sock_notify_shutdown(struct socket *so, int how)
5129 {
5130 errno_t error = 0;
5131 int kcunit;
5132
5133 if (IS_IP_DGRAM(so)) {
5134 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5135 return;
5136 }
5137
5138 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5139 return;
5140 }
5141
5142 CFIL_LOG(LOG_INFO, "so %llx how %d",
5143 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5144
5145 socket_lock_assert_owned(so);
5146
5147 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5148 /* Disconnect incoming side */
5149 if (how != SHUT_WR) {
5150 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5151 }
5152 /* Disconnect outgoing side */
5153 if (how != SHUT_RD) {
5154 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5155 }
5156 }
5157 }
5158
5159 static int
5160 cfil_filters_attached(struct socket *so)
5161 {
5162 struct cfil_entry *entry;
5163 uint32_t kcunit;
5164 int attached = 0;
5165
5166 if (IS_IP_DGRAM(so)) {
5167 return cfil_filters_udp_attached(so, FALSE);
5168 }
5169
5170 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5171 return 0;
5172 }
5173
5174 socket_lock_assert_owned(so);
5175
5176 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5177 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5178
5179 /* Are we attached to the filter? */
5180 if (entry->cfe_filter == NULL) {
5181 continue;
5182 }
5183 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5184 continue;
5185 }
5186 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5187 continue;
5188 }
5189 attached = 1;
5190 break;
5191 }
5192
5193 return attached;
5194 }
5195
5196 /*
5197 * This is called when the socket is closed and we are waiting for
5198 * the filters to gives the final pass or drop
5199 */
5200 void
5201 cfil_sock_close_wait(struct socket *so)
5202 {
5203 lck_mtx_t *mutex_held;
5204 struct timespec ts;
5205 int error;
5206
5207 if (IS_IP_DGRAM(so)) {
5208 cfil_sock_udp_close_wait(so);
5209 return;
5210 }
5211
5212 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5213 return;
5214 }
5215
5216 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5217
5218 if (so->so_proto->pr_getlock != NULL) {
5219 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5220 } else {
5221 mutex_held = so->so_proto->pr_domain->dom_mtx;
5222 }
5223 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5224
5225 while (cfil_filters_attached(so)) {
5226 /*
5227 * Notify the filters we are going away so they can detach
5228 */
5229 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5230
5231 /*
5232 * Make sure we need to wait after the filter are notified
5233 * of the disconnection
5234 */
5235 if (cfil_filters_attached(so) == 0) {
5236 break;
5237 }
5238
5239 CFIL_LOG(LOG_INFO, "so %llx waiting",
5240 (uint64_t)VM_KERNEL_ADDRPERM(so));
5241
5242 ts.tv_sec = cfil_close_wait_timeout / 1000;
5243 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5244 NSEC_PER_USEC * 1000;
5245
5246 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5247 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5248 error = msleep((caddr_t)so->so_cfil, mutex_held,
5249 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5250 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5251
5252 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5253 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5254
5255 /*
5256 * Force close in case of timeout
5257 */
5258 if (error != 0) {
5259 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5260 break;
5261 }
5262 }
5263 }
5264
5265 /*
5266 * Returns the size of the data held by the content filter by using
5267 */
5268 int32_t
5269 cfil_sock_data_pending(struct sockbuf *sb)
5270 {
5271 struct socket *so = sb->sb_so;
5272 uint64_t pending = 0;
5273
5274 if (IS_IP_DGRAM(so)) {
5275 return cfil_sock_udp_data_pending(sb, FALSE);
5276 }
5277
5278 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5279 struct cfi_buf *cfi_buf;
5280
5281 socket_lock_assert_owned(so);
5282
5283 if ((sb->sb_flags & SB_RECV) == 0) {
5284 cfi_buf = &so->so_cfil->cfi_snd;
5285 } else {
5286 cfi_buf = &so->so_cfil->cfi_rcv;
5287 }
5288
5289 pending = cfi_buf->cfi_pending_last -
5290 cfi_buf->cfi_pending_first;
5291
5292 /*
5293 * If we are limited by the "chars of mbufs used" roughly
5294 * adjust so we won't overcommit
5295 */
5296 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5297 pending = cfi_buf->cfi_pending_mbcnt;
5298 }
5299 }
5300
5301 VERIFY(pending < INT32_MAX);
5302
5303 return (int32_t)(pending);
5304 }
5305
5306 /*
5307 * Return the socket buffer space used by data being held by content filters
5308 * so processes won't clog the socket buffer
5309 */
5310 int32_t
5311 cfil_sock_data_space(struct sockbuf *sb)
5312 {
5313 struct socket *so = sb->sb_so;
5314 uint64_t pending = 0;
5315
5316 if (IS_IP_DGRAM(so)) {
5317 return cfil_sock_udp_data_pending(sb, TRUE);
5318 }
5319
5320 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5321 so->so_snd.sb_cfil_thread != current_thread()) {
5322 struct cfi_buf *cfi_buf;
5323
5324 socket_lock_assert_owned(so);
5325
5326 if ((sb->sb_flags & SB_RECV) == 0) {
5327 cfi_buf = &so->so_cfil->cfi_snd;
5328 } else {
5329 cfi_buf = &so->so_cfil->cfi_rcv;
5330 }
5331
5332 pending = cfi_buf->cfi_pending_last -
5333 cfi_buf->cfi_pending_first;
5334
5335 /*
5336 * If we are limited by the "chars of mbufs used" roughly
5337 * adjust so we won't overcommit
5338 */
5339 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5340 pending = cfi_buf->cfi_pending_mbcnt;
5341 }
5342 }
5343
5344 VERIFY(pending < INT32_MAX);
5345
5346 return (int32_t)(pending);
5347 }
5348
5349 /*
5350 * A callback from the socket and protocol layer when data becomes
5351 * available in the socket buffer to give a chance for the content filter
5352 * to re-inject data that was held back
5353 */
5354 void
5355 cfil_sock_buf_update(struct sockbuf *sb)
5356 {
5357 int outgoing;
5358 int error;
5359 struct socket *so = sb->sb_so;
5360
5361 if (IS_IP_DGRAM(so)) {
5362 cfil_sock_udp_buf_update(sb);
5363 return;
5364 }
5365
5366 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5367 return;
5368 }
5369
5370 if (!cfil_sbtrim) {
5371 return;
5372 }
5373
5374 socket_lock_assert_owned(so);
5375
5376 if ((sb->sb_flags & SB_RECV) == 0) {
5377 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5378 return;
5379 }
5380 outgoing = 1;
5381 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5382 } else {
5383 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5384 return;
5385 }
5386 outgoing = 0;
5387 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5388 }
5389
5390 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5391 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5392
5393 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5394 if (error == 0) {
5395 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5396 }
5397 cfil_release_sockbuf(so, outgoing);
5398 }
5399
5400 int
5401 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5402 struct sysctl_req *req)
5403 {
5404 #pragma unused(oidp, arg1, arg2)
5405 int error = 0;
5406 size_t len = 0;
5407 u_int32_t i;
5408
5409 /* Read only */
5410 if (req->newptr != USER_ADDR_NULL) {
5411 return EPERM;
5412 }
5413
5414 cfil_rw_lock_shared(&cfil_lck_rw);
5415
5416 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5417 struct cfil_filter_stat filter_stat;
5418 struct content_filter *cfc = content_filters[i];
5419
5420 if (cfc == NULL) {
5421 continue;
5422 }
5423
5424 /* If just asking for the size */
5425 if (req->oldptr == USER_ADDR_NULL) {
5426 len += sizeof(struct cfil_filter_stat);
5427 continue;
5428 }
5429
5430 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5431 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5432 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5433 filter_stat.cfs_flags = cfc->cf_flags;
5434 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5435 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5436
5437 error = SYSCTL_OUT(req, &filter_stat,
5438 sizeof(struct cfil_filter_stat));
5439 if (error != 0) {
5440 break;
5441 }
5442 }
5443 /* If just asking for the size */
5444 if (req->oldptr == USER_ADDR_NULL) {
5445 req->oldidx = len;
5446 }
5447
5448 cfil_rw_unlock_shared(&cfil_lck_rw);
5449
5450 #if SHOW_DEBUG
5451 if (req->oldptr != USER_ADDR_NULL) {
5452 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5453 cfil_filter_show(i);
5454 }
5455 }
5456 #endif
5457
5458 return error;
5459 }
5460
5461 static int
5462 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5463 struct sysctl_req *req)
5464 {
5465 #pragma unused(oidp, arg1, arg2)
5466 int error = 0;
5467 u_int32_t i;
5468 struct cfil_info *cfi;
5469
5470 /* Read only */
5471 if (req->newptr != USER_ADDR_NULL) {
5472 return EPERM;
5473 }
5474
5475 cfil_rw_lock_shared(&cfil_lck_rw);
5476
5477 /*
5478 * If just asking for the size,
5479 */
5480 if (req->oldptr == USER_ADDR_NULL) {
5481 req->oldidx = cfil_sock_attached_count *
5482 sizeof(struct cfil_sock_stat);
5483 /* Bump the length in case new sockets gets attached */
5484 req->oldidx += req->oldidx >> 3;
5485 goto done;
5486 }
5487
5488 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5489 struct cfil_entry *entry;
5490 struct cfil_sock_stat stat;
5491 struct socket *so = cfi->cfi_so;
5492
5493 bzero(&stat, sizeof(struct cfil_sock_stat));
5494 stat.cfs_len = sizeof(struct cfil_sock_stat);
5495 stat.cfs_sock_id = cfi->cfi_sock_id;
5496 stat.cfs_flags = cfi->cfi_flags;
5497
5498 if (so != NULL) {
5499 stat.cfs_pid = so->last_pid;
5500 memcpy(stat.cfs_uuid, so->last_uuid,
5501 sizeof(uuid_t));
5502 if (so->so_flags & SOF_DELEGATED) {
5503 stat.cfs_e_pid = so->e_pid;
5504 memcpy(stat.cfs_e_uuid, so->e_uuid,
5505 sizeof(uuid_t));
5506 } else {
5507 stat.cfs_e_pid = so->last_pid;
5508 memcpy(stat.cfs_e_uuid, so->last_uuid,
5509 sizeof(uuid_t));
5510 }
5511
5512 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5513 stat.cfs_sock_type = so->so_proto->pr_type;
5514 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5515 }
5516
5517 stat.cfs_snd.cbs_pending_first =
5518 cfi->cfi_snd.cfi_pending_first;
5519 stat.cfs_snd.cbs_pending_last =
5520 cfi->cfi_snd.cfi_pending_last;
5521 stat.cfs_snd.cbs_inject_q_len =
5522 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5523 stat.cfs_snd.cbs_pass_offset =
5524 cfi->cfi_snd.cfi_pass_offset;
5525
5526 stat.cfs_rcv.cbs_pending_first =
5527 cfi->cfi_rcv.cfi_pending_first;
5528 stat.cfs_rcv.cbs_pending_last =
5529 cfi->cfi_rcv.cfi_pending_last;
5530 stat.cfs_rcv.cbs_inject_q_len =
5531 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5532 stat.cfs_rcv.cbs_pass_offset =
5533 cfi->cfi_rcv.cfi_pass_offset;
5534
5535 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5536 struct cfil_entry_stat *estat;
5537 struct cfe_buf *ebuf;
5538 struct cfe_buf_stat *sbuf;
5539
5540 entry = &cfi->cfi_entries[i];
5541
5542 estat = &stat.ces_entries[i];
5543
5544 estat->ces_len = sizeof(struct cfil_entry_stat);
5545 estat->ces_filter_id = entry->cfe_filter ?
5546 entry->cfe_filter->cf_kcunit : 0;
5547 estat->ces_flags = entry->cfe_flags;
5548 estat->ces_necp_control_unit =
5549 entry->cfe_necp_control_unit;
5550
5551 estat->ces_last_event.tv_sec =
5552 (int64_t)entry->cfe_last_event.tv_sec;
5553 estat->ces_last_event.tv_usec =
5554 (int64_t)entry->cfe_last_event.tv_usec;
5555
5556 estat->ces_last_action.tv_sec =
5557 (int64_t)entry->cfe_last_action.tv_sec;
5558 estat->ces_last_action.tv_usec =
5559 (int64_t)entry->cfe_last_action.tv_usec;
5560
5561 ebuf = &entry->cfe_snd;
5562 sbuf = &estat->ces_snd;
5563 sbuf->cbs_pending_first =
5564 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5565 sbuf->cbs_pending_last =
5566 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5567 sbuf->cbs_ctl_first =
5568 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5569 sbuf->cbs_ctl_last =
5570 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5571 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5572 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5573 sbuf->cbs_peeked = ebuf->cfe_peeked;
5574
5575 ebuf = &entry->cfe_rcv;
5576 sbuf = &estat->ces_rcv;
5577 sbuf->cbs_pending_first =
5578 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5579 sbuf->cbs_pending_last =
5580 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5581 sbuf->cbs_ctl_first =
5582 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5583 sbuf->cbs_ctl_last =
5584 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5585 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5586 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5587 sbuf->cbs_peeked = ebuf->cfe_peeked;
5588 }
5589 error = SYSCTL_OUT(req, &stat,
5590 sizeof(struct cfil_sock_stat));
5591 if (error != 0) {
5592 break;
5593 }
5594 }
5595 done:
5596 cfil_rw_unlock_shared(&cfil_lck_rw);
5597
5598 #if SHOW_DEBUG
5599 if (req->oldptr != USER_ADDR_NULL) {
5600 cfil_info_show();
5601 }
5602 #endif
5603
5604 return error;
5605 }
5606
5607 /*
5608 * UDP Socket Support
5609 */
5610 static void
5611 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5612 {
5613 char local[MAX_IPv6_STR_LEN + 6];
5614 char remote[MAX_IPv6_STR_LEN + 6];
5615 const void *addr;
5616
5617 // No sock or not UDP, no-op
5618 if (so == NULL || entry == NULL) {
5619 return;
5620 }
5621
5622 local[0] = remote[0] = 0x0;
5623
5624 switch (entry->cfentry_family) {
5625 case AF_INET6:
5626 addr = &entry->cfentry_laddr.addr6;
5627 inet_ntop(AF_INET6, addr, local, sizeof(local));
5628 addr = &entry->cfentry_faddr.addr6;
5629 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5630 break;
5631 case AF_INET:
5632 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5633 inet_ntop(AF_INET, addr, local, sizeof(local));
5634 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5635 inet_ntop(AF_INET, addr, remote, sizeof(local));
5636 break;
5637 default:
5638 return;
5639 }
5640
5641 CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5642 msg,
5643 IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5644 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5645 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5646 }
5647
5648 static void
5649 cfil_inp_log(int level, struct socket *so, const char* msg)
5650 {
5651 struct inpcb *inp = NULL;
5652 char local[MAX_IPv6_STR_LEN + 6];
5653 char remote[MAX_IPv6_STR_LEN + 6];
5654 const void *addr;
5655
5656 if (so == NULL) {
5657 return;
5658 }
5659
5660 inp = sotoinpcb(so);
5661 if (inp == NULL) {
5662 return;
5663 }
5664
5665 local[0] = remote[0] = 0x0;
5666
5667 #if INET6
5668 if (inp->inp_vflag & INP_IPV6) {
5669 addr = &inp->in6p_laddr.s6_addr32;
5670 inet_ntop(AF_INET6, addr, local, sizeof(local));
5671 addr = &inp->in6p_faddr.s6_addr32;
5672 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5673 } else
5674 #endif /* INET6 */
5675 {
5676 addr = &inp->inp_laddr.s_addr;
5677 inet_ntop(AF_INET, addr, local, sizeof(local));
5678 addr = &inp->inp_faddr.s_addr;
5679 inet_ntop(AF_INET, addr, remote, sizeof(local));
5680 }
5681
5682 if (so->so_cfil != NULL) {
5683 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5684 msg, IS_UDP(so) ? "UDP" : "TCP",
5685 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5686 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5687 } else {
5688 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5689 msg, IS_UDP(so) ? "UDP" : "TCP",
5690 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5691 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5692 }
5693 }
5694
5695 static void
5696 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5697 {
5698 if (cfil_info == NULL) {
5699 return;
5700 }
5701
5702 if (cfil_info->cfi_hash_entry != NULL) {
5703 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5704 } else {
5705 cfil_inp_log(level, cfil_info->cfi_so, msg);
5706 }
5707 }
5708
5709 errno_t
5710 cfil_db_init(struct socket *so)
5711 {
5712 errno_t error = 0;
5713 struct cfil_db *db = NULL;
5714
5715 CFIL_LOG(LOG_INFO, "");
5716
5717 db = zalloc(cfil_db_zone);
5718 if (db == NULL) {
5719 error = ENOMEM;
5720 goto done;
5721 }
5722 bzero(db, sizeof(struct cfil_db));
5723 db->cfdb_so = so;
5724 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5725 if (db->cfdb_hashbase == NULL) {
5726 zfree(cfil_db_zone, db);
5727 db = NULL;
5728 error = ENOMEM;
5729 goto done;
5730 }
5731
5732 so->so_cfil_db = db;
5733
5734 done:
5735 return error;
5736 }
5737
5738 void
5739 cfil_db_free(struct socket *so)
5740 {
5741 struct cfil_hash_entry *entry = NULL;
5742 struct cfil_hash_entry *temp_entry = NULL;
5743 struct cfilhashhead *cfilhash = NULL;
5744 struct cfil_db *db = NULL;
5745
5746 CFIL_LOG(LOG_INFO, "");
5747
5748 if (so == NULL || so->so_cfil_db == NULL) {
5749 return;
5750 }
5751 db = so->so_cfil_db;
5752
5753 #if LIFECYCLE_DEBUG
5754 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5755 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5756 #endif
5757
5758 for (int i = 0; i < CFILHASHSIZE; i++) {
5759 cfilhash = &db->cfdb_hashbase[i];
5760 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5761 if (entry->cfentry_cfil != NULL) {
5762 #if LIFECYCLE_DEBUG
5763 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5764 #endif
5765 cfil_info_free(entry->cfentry_cfil);
5766 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5767 entry->cfentry_cfil = NULL;
5768 }
5769
5770 cfil_db_delete_entry(db, entry);
5771 if (so->so_flags & SOF_CONTENT_FILTER) {
5772 if (db->cfdb_count == 0) {
5773 so->so_flags &= ~SOF_CONTENT_FILTER;
5774 }
5775 VERIFY(so->so_usecount > 0);
5776 so->so_usecount--;
5777 }
5778 }
5779 }
5780
5781 // Make sure all entries are cleaned up!
5782 VERIFY(db->cfdb_count == 0);
5783 #if LIFECYCLE_DEBUG
5784 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5785 #endif
5786
5787 FREE(db->cfdb_hashbase, M_CFIL);
5788 zfree(cfil_db_zone, db);
5789 so->so_cfil_db = NULL;
5790 }
5791
5792 static bool
5793 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5794 {
5795 struct sockaddr_in *sin = NULL;
5796 struct sockaddr_in6 *sin6 = NULL;
5797
5798 if (entry == NULL || addr == NULL) {
5799 return FALSE;
5800 }
5801
5802 switch (addr->sa_family) {
5803 case AF_INET:
5804 sin = satosin(addr);
5805 if (sin->sin_len != sizeof(*sin)) {
5806 return FALSE;
5807 }
5808 if (isLocal == TRUE) {
5809 entry->cfentry_lport = sin->sin_port;
5810 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5811 } else {
5812 entry->cfentry_fport = sin->sin_port;
5813 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5814 }
5815 entry->cfentry_family = AF_INET;
5816 return TRUE;
5817 case AF_INET6:
5818 sin6 = satosin6(addr);
5819 if (sin6->sin6_len != sizeof(*sin6)) {
5820 return FALSE;
5821 }
5822 if (isLocal == TRUE) {
5823 entry->cfentry_lport = sin6->sin6_port;
5824 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5825 } else {
5826 entry->cfentry_fport = sin6->sin6_port;
5827 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5828 }
5829 entry->cfentry_family = AF_INET6;
5830 return TRUE;
5831 default:
5832 return FALSE;
5833 }
5834 }
5835
5836 static bool
5837 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5838 {
5839 if (entry == NULL || inp == NULL) {
5840 return FALSE;
5841 }
5842
5843 if (inp->inp_vflag & INP_IPV6) {
5844 if (isLocal == TRUE) {
5845 entry->cfentry_lport = inp->inp_lport;
5846 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5847 } else {
5848 entry->cfentry_fport = inp->inp_fport;
5849 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5850 }
5851 entry->cfentry_family = AF_INET6;
5852 return TRUE;
5853 } else if (inp->inp_vflag & INP_IPV4) {
5854 if (isLocal == TRUE) {
5855 entry->cfentry_lport = inp->inp_lport;
5856 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5857 } else {
5858 entry->cfentry_fport = inp->inp_fport;
5859 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5860 }
5861 entry->cfentry_family = AF_INET;
5862 return TRUE;
5863 }
5864 return FALSE;
5865 }
5866
5867 bool
5868 check_port(struct sockaddr *addr, u_short port)
5869 {
5870 struct sockaddr_in *sin = NULL;
5871 struct sockaddr_in6 *sin6 = NULL;
5872
5873 if (addr == NULL || port == 0) {
5874 return FALSE;
5875 }
5876
5877 switch (addr->sa_family) {
5878 case AF_INET:
5879 sin = satosin(addr);
5880 if (sin->sin_len != sizeof(*sin)) {
5881 return FALSE;
5882 }
5883 if (port == ntohs(sin->sin_port)) {
5884 return TRUE;
5885 }
5886 break;
5887 case AF_INET6:
5888 sin6 = satosin6(addr);
5889 if (sin6->sin6_len != sizeof(*sin6)) {
5890 return FALSE;
5891 }
5892 if (port == ntohs(sin6->sin6_port)) {
5893 return TRUE;
5894 }
5895 break;
5896 default:
5897 break;
5898 }
5899 return FALSE;
5900 }
5901
5902 struct cfil_hash_entry *
5903 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5904 {
5905 struct cfilhashhead *cfilhash = NULL;
5906 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5907 struct cfil_hash_entry *nextentry;
5908
5909 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5910 return NULL;
5911 }
5912
5913 flowhash &= db->cfdb_hashmask;
5914 cfilhash = &db->cfdb_hashbase[flowhash];
5915
5916 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5917 if (nextentry->cfentry_cfil != NULL &&
5918 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5919 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5920 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5921 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5922 return nextentry;
5923 }
5924 }
5925
5926 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5927 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5928 return NULL;
5929 }
5930
5931 struct cfil_hash_entry *
5932 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
5933 {
5934 struct cfil_hash_entry matchentry = { };
5935 struct cfil_hash_entry *nextentry = NULL;
5936 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5937 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5938 u_int16_t hashkey_fport = 0, hashkey_lport = 0;
5939 int inp_hash_element = 0;
5940 struct cfilhashhead *cfilhash = NULL;
5941
5942 CFIL_LOG(LOG_INFO, "");
5943
5944 if (inp == NULL) {
5945 goto done;
5946 }
5947
5948 if (remoteOnly == false) {
5949 if (local != NULL) {
5950 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5951 } else {
5952 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5953 }
5954 }
5955 if (remote != NULL) {
5956 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5957 } else {
5958 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5959 }
5960
5961 #if INET6
5962 if (inp->inp_vflag & INP_IPV6) {
5963 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5964 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
5965 } else
5966 #endif /* INET6 */
5967 {
5968 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5969 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
5970 }
5971
5972 hashkey_fport = matchentry.cfentry_fport;
5973 hashkey_lport = (remoteOnly == false) ? matchentry.cfentry_lport : 0;
5974
5975 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
5976 inp_hash_element &= db->cfdb_hashmask;
5977
5978 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5979
5980 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5981 #if INET6
5982 if ((inp->inp_vflag & INP_IPV6) &&
5983 (remoteOnly || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
5984 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5985 (remoteOnly || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
5986 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5987 #if DATA_DEBUG
5988 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5989 #endif
5990 return nextentry;
5991 } else
5992 #endif /* INET6 */
5993 if ((remoteOnly || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
5994 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5995 (remoteOnly || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
5996 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5997 #if DATA_DEBUG
5998 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5999 #endif
6000 return nextentry;
6001 }
6002 }
6003
6004 done:
6005 #if DATA_DEBUG
6006 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6007 #endif
6008 return NULL;
6009 }
6010
6011 void
6012 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6013 {
6014 if (hash_entry == NULL) {
6015 return;
6016 }
6017 if (db == NULL || db->cfdb_count == 0) {
6018 return;
6019 }
6020 db->cfdb_count--;
6021 if (db->cfdb_only_entry == hash_entry) {
6022 db->cfdb_only_entry = NULL;
6023 }
6024 LIST_REMOVE(hash_entry, cfentry_link);
6025 zfree(cfil_hash_entry_zone, hash_entry);
6026 }
6027
6028 struct cfil_hash_entry *
6029 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6030 {
6031 struct cfil_hash_entry *entry = NULL;
6032 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6033 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6034 int inp_hash_element = 0;
6035 struct cfilhashhead *cfilhash = NULL;
6036
6037 CFIL_LOG(LOG_INFO, "");
6038
6039 if (inp == NULL) {
6040 goto done;
6041 }
6042
6043 entry = zalloc(cfil_hash_entry_zone);
6044 if (entry == NULL) {
6045 goto done;
6046 }
6047 bzero(entry, sizeof(struct cfil_hash_entry));
6048
6049 if (local != NULL) {
6050 fill_cfil_hash_entry_from_address(entry, TRUE, local);
6051 } else {
6052 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
6053 }
6054 if (remote != NULL) {
6055 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
6056 } else {
6057 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
6058 }
6059 entry->cfentry_lastused = net_uptime();
6060
6061 #if INET6
6062 if (inp->inp_vflag & INP_IPV6) {
6063 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6064 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6065 } else
6066 #endif /* INET6 */
6067 {
6068 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6069 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6070 }
6071 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6072 entry->cfentry_lport, entry->cfentry_fport);
6073 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6074
6075 cfilhash = &db->cfdb_hashbase[inp_hash_element];
6076
6077 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6078 db->cfdb_count++;
6079 db->cfdb_only_entry = entry;
6080 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6081
6082 done:
6083 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6084 return entry;
6085 }
6086
6087 void
6088 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local)
6089 {
6090 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6091
6092 CFIL_LOG(LOG_INFO, "");
6093
6094 if (inp == NULL || entry == NULL) {
6095 return;
6096 }
6097
6098 if (local != NULL) {
6099 fill_cfil_hash_entry_from_address(entry, TRUE, local);
6100 } else {
6101 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
6102 }
6103 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: local updated");
6104
6105 return;
6106 }
6107
6108 struct cfil_info *
6109 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6110 {
6111 struct cfil_hash_entry *hash_entry = NULL;
6112
6113 CFIL_LOG(LOG_INFO, "");
6114
6115 if (db == NULL || id == 0) {
6116 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6117 db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6118 return NULL;
6119 }
6120
6121 // This is an optimization for connected UDP socket which only has one flow.
6122 // No need to do the hash lookup.
6123 if (db->cfdb_count == 1) {
6124 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6125 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6126 return db->cfdb_only_entry->cfentry_cfil;
6127 }
6128 }
6129
6130 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6131 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6132 }
6133
6134 struct cfil_hash_entry *
6135 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, int debug)
6136 {
6137 struct cfil_hash_entry *hash_entry = NULL;
6138
6139 errno_t error = 0;
6140 socket_lock_assert_owned(so);
6141
6142 // If new socket, allocate cfil db
6143 if (so->so_cfil_db == NULL) {
6144 if (cfil_db_init(so) != 0) {
6145 return NULL;
6146 }
6147 }
6148
6149 // See if flow already exists.
6150 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6151 if (hash_entry == NULL) {
6152 // No match with both local and remote, try match with remote only
6153 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6154 if (hash_entry != NULL) {
6155 // Simply update the local address into the original flow, keeping
6156 // its sockId and flow_hash unchanged.
6157 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local);
6158 }
6159 }
6160 if (hash_entry != NULL) {
6161 return hash_entry;
6162 }
6163
6164 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6165 if (hash_entry == NULL) {
6166 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6167 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6168 return NULL;
6169 }
6170
6171 if (cfil_info_alloc(so, hash_entry) == NULL ||
6172 hash_entry->cfentry_cfil == NULL) {
6173 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6174 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6175 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6176 return NULL;
6177 }
6178 hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6179 hash_entry->cfentry_cfil->cfi_debug = debug;
6180
6181 #if LIFECYCLE_DEBUG
6182 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6183 #endif
6184
6185 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6186 cfil_info_free(hash_entry->cfentry_cfil);
6187 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6188 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6189 filter_control_unit);
6190 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6191 return NULL;
6192 }
6193 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6194 (uint64_t)VM_KERNEL_ADDRPERM(so),
6195 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6196
6197 so->so_flags |= SOF_CONTENT_FILTER;
6198 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6199
6200 /* Hold a reference on the socket for each flow */
6201 so->so_usecount++;
6202
6203 if (debug) {
6204 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6205 }
6206
6207 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6208 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6209 /* We can recover from flow control or out of memory errors */
6210 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6211 return NULL;
6212 }
6213
6214 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6215 return hash_entry;
6216 }
6217
6218 errno_t
6219 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6220 struct sockaddr *local, struct sockaddr *remote,
6221 struct mbuf *data, struct mbuf *control, uint32_t flags)
6222 {
6223 #pragma unused(outgoing, so, local, remote, data, control, flags)
6224 errno_t error = 0;
6225 uint32_t filter_control_unit;
6226 struct cfil_hash_entry *hash_entry = NULL;
6227 struct cfil_info *cfil_info = NULL;
6228 int debug = 0;
6229
6230 socket_lock_assert_owned(so);
6231
6232 if (cfil_active_count == 0) {
6233 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6234 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6235 return error;
6236 }
6237
6238 // Socket has been blessed
6239 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6240 return error;
6241 }
6242
6243 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6244 if (filter_control_unit == 0) {
6245 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6246 return error;
6247 }
6248
6249 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6250 return error;
6251 }
6252
6253 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6254 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6255 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6256 return error;
6257 }
6258
6259 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, debug);
6260 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6261 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6262 return EPIPE;
6263 }
6264 // Update last used timestamp, this is for flow Idle TO
6265 hash_entry->cfentry_lastused = net_uptime();
6266 cfil_info = hash_entry->cfentry_cfil;
6267
6268 if (cfil_info->cfi_flags & CFIF_DROP) {
6269 #if DATA_DEBUG
6270 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6271 #endif
6272 return EPIPE;
6273 }
6274 if (control != NULL) {
6275 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6276 }
6277 if (data->m_type == MT_OOBDATA) {
6278 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6279 (uint64_t)VM_KERNEL_ADDRPERM(so));
6280 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6281 }
6282
6283 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6284
6285 return error;
6286 }
6287
6288 /*
6289 * Go through all UDP flows for specified socket and returns TRUE if
6290 * any flow is still attached. If need_wait is TRUE, wait on first
6291 * attached flow.
6292 */
6293 static int
6294 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6295 {
6296 struct timespec ts;
6297 lck_mtx_t *mutex_held;
6298 struct cfilhashhead *cfilhash = NULL;
6299 struct cfil_db *db = NULL;
6300 struct cfil_hash_entry *hash_entry = NULL;
6301 struct cfil_hash_entry *temp_hash_entry = NULL;
6302 struct cfil_info *cfil_info = NULL;
6303 struct cfil_entry *entry = NULL;
6304 errno_t error = 0;
6305 int kcunit;
6306 int attached = 0;
6307 uint64_t sock_flow_id = 0;
6308
6309 socket_lock_assert_owned(so);
6310
6311 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6312 if (so->so_proto->pr_getlock != NULL) {
6313 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6314 } else {
6315 mutex_held = so->so_proto->pr_domain->dom_mtx;
6316 }
6317 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6318
6319 db = so->so_cfil_db;
6320
6321 for (int i = 0; i < CFILHASHSIZE; i++) {
6322 cfilhash = &db->cfdb_hashbase[i];
6323
6324 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6325 if (hash_entry->cfentry_cfil != NULL) {
6326 cfil_info = hash_entry->cfentry_cfil;
6327 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6328 entry = &cfil_info->cfi_entries[kcunit - 1];
6329
6330 /* Are we attached to the filter? */
6331 if (entry->cfe_filter == NULL) {
6332 continue;
6333 }
6334
6335 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6336 continue;
6337 }
6338 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6339 continue;
6340 }
6341
6342 attached = 1;
6343
6344 if (need_wait == TRUE) {
6345 #if LIFECYCLE_DEBUG
6346 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6347 #endif
6348
6349 ts.tv_sec = cfil_close_wait_timeout / 1000;
6350 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6351 NSEC_PER_USEC * 1000;
6352
6353 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6354 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6355 sock_flow_id = cfil_info->cfi_sock_id;
6356
6357 error = msleep((caddr_t)cfil_info, mutex_held,
6358 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6359
6360 // Woke up from sleep, validate if cfil_info is still valid
6361 if (so->so_cfil_db == NULL ||
6362 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6363 // cfil_info is not valid, do not continue
6364 goto done;
6365 }
6366
6367 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6368
6369 #if LIFECYCLE_DEBUG
6370 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6371 #endif
6372
6373 /*
6374 * Force close in case of timeout
6375 */
6376 if (error != 0) {
6377 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6378 #if LIFECYCLE_DEBUG
6379 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6380 #endif
6381 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6382 }
6383 }
6384 goto done;
6385 }
6386 }
6387 }
6388 }
6389 }
6390
6391 done:
6392 return attached;
6393 }
6394
6395 int32_t
6396 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6397 {
6398 struct socket *so = sb->sb_so;
6399 struct cfi_buf *cfi_buf;
6400 uint64_t pending = 0;
6401 uint64_t total_pending = 0;
6402 struct cfilhashhead *cfilhash = NULL;
6403 struct cfil_db *db = NULL;
6404 struct cfil_hash_entry *hash_entry = NULL;
6405 struct cfil_hash_entry *temp_hash_entry = NULL;
6406
6407 socket_lock_assert_owned(so);
6408
6409 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6410 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6411 db = so->so_cfil_db;
6412
6413 for (int i = 0; i < CFILHASHSIZE; i++) {
6414 cfilhash = &db->cfdb_hashbase[i];
6415
6416 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6417 if (hash_entry->cfentry_cfil != NULL) {
6418 if ((sb->sb_flags & SB_RECV) == 0) {
6419 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6420 } else {
6421 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6422 }
6423
6424 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6425 /*
6426 * If we are limited by the "chars of mbufs used" roughly
6427 * adjust so we won't overcommit
6428 */
6429 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6430 pending = cfi_buf->cfi_pending_mbcnt;
6431 }
6432
6433 total_pending += pending;
6434 }
6435 }
6436 }
6437
6438 VERIFY(total_pending < INT32_MAX);
6439 #if DATA_DEBUG
6440 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6441 (uint64_t)VM_KERNEL_ADDRPERM(so),
6442 total_pending, check_thread);
6443 #endif
6444 }
6445
6446 return (int32_t)(total_pending);
6447 }
6448
6449 int
6450 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6451 {
6452 struct cfil_info *cfil_info = NULL;
6453 struct cfilhashhead *cfilhash = NULL;
6454 struct cfil_db *db = NULL;
6455 struct cfil_hash_entry *hash_entry = NULL;
6456 struct cfil_hash_entry *temp_hash_entry = NULL;
6457 errno_t error = 0;
6458 int done_count = 0;
6459 int kcunit;
6460
6461 socket_lock_assert_owned(so);
6462
6463 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6464 db = so->so_cfil_db;
6465
6466 for (int i = 0; i < CFILHASHSIZE; i++) {
6467 cfilhash = &db->cfdb_hashbase[i];
6468
6469 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6470 if (hash_entry->cfentry_cfil != NULL) {
6471 cfil_info = hash_entry->cfentry_cfil;
6472
6473 // This flow is marked as DROP
6474 if (cfil_info->cfi_flags & drop_flag) {
6475 done_count++;
6476 continue;
6477 }
6478
6479 // This flow has been shut already, skip
6480 if (cfil_info->cfi_flags & shut_flag) {
6481 continue;
6482 }
6483 // Mark flow as shut
6484 cfil_info->cfi_flags |= shut_flag;
6485 done_count++;
6486
6487 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6488 /* Disconnect incoming side */
6489 if (how != SHUT_WR) {
6490 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6491 }
6492 /* Disconnect outgoing side */
6493 if (how != SHUT_RD) {
6494 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6495 }
6496 }
6497 }
6498 }
6499 }
6500 }
6501
6502 if (done_count == 0) {
6503 error = ENOTCONN;
6504 }
6505 return error;
6506 }
6507
6508 int
6509 cfil_sock_udp_shutdown(struct socket *so, int *how)
6510 {
6511 int error = 0;
6512
6513 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6514 goto done;
6515 }
6516
6517 socket_lock_assert_owned(so);
6518
6519 CFIL_LOG(LOG_INFO, "so %llx how %d",
6520 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6521
6522 /*
6523 * Check the state of the socket before the content filter
6524 */
6525 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6526 /* read already shut down */
6527 error = ENOTCONN;
6528 goto done;
6529 }
6530 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6531 /* write already shut down */
6532 error = ENOTCONN;
6533 goto done;
6534 }
6535
6536 /*
6537 * shutdown read: SHUT_RD or SHUT_RDWR
6538 */
6539 if (*how != SHUT_WR) {
6540 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6541 if (error != 0) {
6542 goto done;
6543 }
6544 }
6545 /*
6546 * shutdown write: SHUT_WR or SHUT_RDWR
6547 */
6548 if (*how != SHUT_RD) {
6549 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6550 if (error != 0) {
6551 goto done;
6552 }
6553
6554 /*
6555 * When outgoing data is pending, we delay the shutdown at the
6556 * protocol level until the content filters give the final
6557 * verdict on the pending data.
6558 */
6559 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6560 /*
6561 * When shutting down the read and write sides at once
6562 * we can proceed to the final shutdown of the read
6563 * side. Otherwise, we just return.
6564 */
6565 if (*how == SHUT_WR) {
6566 error = EJUSTRETURN;
6567 } else if (*how == SHUT_RDWR) {
6568 *how = SHUT_RD;
6569 }
6570 }
6571 }
6572 done:
6573 return error;
6574 }
6575
6576 void
6577 cfil_sock_udp_close_wait(struct socket *so)
6578 {
6579 socket_lock_assert_owned(so);
6580
6581 while (cfil_filters_udp_attached(so, FALSE)) {
6582 /*
6583 * Notify the filters we are going away so they can detach
6584 */
6585 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6586
6587 /*
6588 * Make sure we need to wait after the filter are notified
6589 * of the disconnection
6590 */
6591 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6592 break;
6593 }
6594 }
6595 }
6596
6597 void
6598 cfil_sock_udp_is_closed(struct socket *so)
6599 {
6600 struct cfil_info *cfil_info = NULL;
6601 struct cfilhashhead *cfilhash = NULL;
6602 struct cfil_db *db = NULL;
6603 struct cfil_hash_entry *hash_entry = NULL;
6604 struct cfil_hash_entry *temp_hash_entry = NULL;
6605 errno_t error = 0;
6606 int kcunit;
6607
6608 socket_lock_assert_owned(so);
6609
6610 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6611 db = so->so_cfil_db;
6612
6613 for (int i = 0; i < CFILHASHSIZE; i++) {
6614 cfilhash = &db->cfdb_hashbase[i];
6615
6616 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6617 if (hash_entry->cfentry_cfil != NULL) {
6618 cfil_info = hash_entry->cfentry_cfil;
6619
6620 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6621 /* Let the filters know of the closing */
6622 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6623 }
6624
6625 /* Last chance to push passed data out */
6626 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6627 if (error == 0) {
6628 cfil_service_inject_queue(so, cfil_info, 1);
6629 }
6630 cfil_release_sockbuf(so, 1);
6631
6632 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6633
6634 /* Pending data needs to go */
6635 cfil_flush_queues(so, cfil_info);
6636
6637 CFIL_INFO_VERIFY(cfil_info);
6638 }
6639 }
6640 }
6641 }
6642 }
6643
6644 void
6645 cfil_sock_udp_buf_update(struct sockbuf *sb)
6646 {
6647 struct cfil_info *cfil_info = NULL;
6648 struct cfilhashhead *cfilhash = NULL;
6649 struct cfil_db *db = NULL;
6650 struct cfil_hash_entry *hash_entry = NULL;
6651 struct cfil_hash_entry *temp_hash_entry = NULL;
6652 errno_t error = 0;
6653 int outgoing;
6654 struct socket *so = sb->sb_so;
6655
6656 socket_lock_assert_owned(so);
6657
6658 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6659 if (!cfil_sbtrim) {
6660 return;
6661 }
6662
6663 db = so->so_cfil_db;
6664
6665 for (int i = 0; i < CFILHASHSIZE; i++) {
6666 cfilhash = &db->cfdb_hashbase[i];
6667
6668 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6669 if (hash_entry->cfentry_cfil != NULL) {
6670 cfil_info = hash_entry->cfentry_cfil;
6671
6672 if ((sb->sb_flags & SB_RECV) == 0) {
6673 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6674 return;
6675 }
6676 outgoing = 1;
6677 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6678 } else {
6679 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6680 return;
6681 }
6682 outgoing = 0;
6683 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6684 }
6685
6686 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6687 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6688
6689 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6690 if (error == 0) {
6691 cfil_service_inject_queue(so, cfil_info, outgoing);
6692 }
6693 cfil_release_sockbuf(so, outgoing);
6694 }
6695 }
6696 }
6697 }
6698 }
6699
6700 void
6701 cfil_filter_show(u_int32_t kcunit)
6702 {
6703 struct content_filter *cfc = NULL;
6704 struct cfil_entry *entry;
6705 int count = 0;
6706
6707 if (content_filters == NULL) {
6708 return;
6709 }
6710 if (kcunit > MAX_CONTENT_FILTER) {
6711 return;
6712 }
6713
6714 cfil_rw_lock_shared(&cfil_lck_rw);
6715
6716 if (content_filters[kcunit - 1] == NULL) {
6717 cfil_rw_unlock_shared(&cfil_lck_rw);
6718 return;
6719 }
6720 cfc = content_filters[kcunit - 1];
6721
6722 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6723 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6724 if (cfc->cf_flags & CFF_DETACHING) {
6725 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6726 }
6727 if (cfc->cf_flags & CFF_ACTIVE) {
6728 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6729 }
6730 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6731 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6732 }
6733
6734 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6735 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6736 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6737
6738 count++;
6739
6740 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6741 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6742 } else {
6743 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6744 }
6745 }
6746 }
6747
6748 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6749
6750 cfil_rw_unlock_shared(&cfil_lck_rw);
6751 }
6752
6753 void
6754 cfil_info_show(void)
6755 {
6756 struct cfil_info *cfil_info;
6757 int count = 0;
6758
6759 cfil_rw_lock_shared(&cfil_lck_rw);
6760
6761 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6762
6763 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6764 count++;
6765
6766 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6767
6768 if (cfil_info->cfi_flags & CFIF_DROP) {
6769 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6770 }
6771 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6772 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6773 }
6774 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6775 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6776 }
6777 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6778 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6779 }
6780 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6781 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6782 }
6783 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6784 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6785 }
6786 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6787 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6788 }
6789 }
6790
6791 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6792
6793 cfil_rw_unlock_shared(&cfil_lck_rw);
6794 }
6795
6796 bool
6797 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6798 {
6799 if (cfil_info && cfil_info->cfi_hash_entry &&
6800 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6801 #if GC_DEBUG
6802 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6803 #endif
6804 return true;
6805 }
6806 return false;
6807 }
6808
6809 bool
6810 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6811 {
6812 struct cfil_entry *entry;
6813 struct timeval current_tv;
6814 struct timeval diff_time;
6815
6816 if (cfil_info == NULL) {
6817 return false;
6818 }
6819
6820 /*
6821 * If we have queued up more data than passed offset and we haven't received
6822 * an action from user space for a while (the user space filter might have crashed),
6823 * return action timed out.
6824 */
6825 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6826 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6827 microuptime(&current_tv);
6828
6829 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6830 entry = &cfil_info->cfi_entries[kcunit - 1];
6831
6832 if (entry->cfe_filter == NULL) {
6833 continue;
6834 }
6835
6836 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6837 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6838 // haven't gotten an action from this filter, check timeout
6839 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6840 if (diff_time.tv_sec >= timeout) {
6841 #if GC_DEBUG
6842 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6843 #endif
6844 return true;
6845 }
6846 }
6847 }
6848 }
6849 return false;
6850 }
6851
6852 bool
6853 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6854 {
6855 if (cfil_info == NULL) {
6856 return false;
6857 }
6858
6859 /*
6860 * Clean up flow if it exceeded queue thresholds
6861 */
6862 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6863 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6864 #if GC_DEBUG
6865 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6866 cfil_udp_gc_mbuf_num_max,
6867 cfil_udp_gc_mbuf_cnt_max,
6868 cfil_info->cfi_snd.cfi_tail_drop_cnt,
6869 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6870 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6871 #endif
6872 return true;
6873 }
6874
6875 return false;
6876 }
6877
6878 static void
6879 cfil_udp_gc_thread_sleep(bool forever)
6880 {
6881 if (forever) {
6882 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6883 THREAD_INTERRUPTIBLE);
6884 } else {
6885 uint64_t deadline = 0;
6886 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6887 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6888
6889 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6890 THREAD_INTERRUPTIBLE, deadline);
6891 }
6892 }
6893
6894 static void
6895 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6896 {
6897 #pragma unused(v, w)
6898
6899 ASSERT(cfil_udp_gc_thread == current_thread());
6900 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6901
6902 // Kick off gc shortly
6903 cfil_udp_gc_thread_sleep(false);
6904 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6905 /* NOTREACHED */
6906 }
6907
6908 static void
6909 cfil_info_udp_expire(void *v, wait_result_t w)
6910 {
6911 #pragma unused(v, w)
6912
6913 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6914 static uint32_t expired_count = 0;
6915
6916 struct cfil_info *cfil_info;
6917 struct cfil_hash_entry *hash_entry;
6918 struct cfil_db *db;
6919 struct socket *so;
6920 u_int64_t current_time = 0;
6921
6922 current_time = net_uptime();
6923
6924 // Get all expired UDP flow ids
6925 cfil_rw_lock_shared(&cfil_lck_rw);
6926
6927 if (cfil_sock_udp_attached_count == 0) {
6928 cfil_rw_unlock_shared(&cfil_lck_rw);
6929 goto go_sleep;
6930 }
6931
6932 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6933 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6934 break;
6935 }
6936
6937 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
6938 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6939 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6940 cfil_info_buffer_threshold_exceeded(cfil_info)) {
6941 expired_array[expired_count] = cfil_info->cfi_sock_id;
6942 expired_count++;
6943 }
6944 }
6945 }
6946 cfil_rw_unlock_shared(&cfil_lck_rw);
6947
6948 if (expired_count == 0) {
6949 goto go_sleep;
6950 }
6951
6952 for (uint32_t i = 0; i < expired_count; i++) {
6953 // Search for socket (UDP only and lock so)
6954 so = cfil_socket_from_sock_id(expired_array[i], true);
6955 if (so == NULL) {
6956 continue;
6957 }
6958
6959 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6960 if (cfil_info == NULL) {
6961 goto unlock;
6962 }
6963
6964 db = so->so_cfil_db;
6965 hash_entry = cfil_info->cfi_hash_entry;
6966
6967 if (db == NULL || hash_entry == NULL) {
6968 goto unlock;
6969 }
6970
6971 #if GC_DEBUG || LIFECYCLE_DEBUG
6972 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6973 #endif
6974
6975 cfil_db_delete_entry(db, hash_entry);
6976 cfil_info_free(cfil_info);
6977 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6978
6979 if (so->so_flags & SOF_CONTENT_FILTER) {
6980 if (db->cfdb_count == 0) {
6981 so->so_flags &= ~SOF_CONTENT_FILTER;
6982 }
6983 VERIFY(so->so_usecount > 0);
6984 so->so_usecount--;
6985 }
6986 unlock:
6987 socket_unlock(so, 1);
6988 }
6989
6990 #if GC_DEBUG
6991 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6992 #endif
6993 expired_count = 0;
6994
6995 go_sleep:
6996
6997 // Sleep forever (until waken up) if no more UDP flow to clean
6998 cfil_rw_lock_shared(&cfil_lck_rw);
6999 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7000 cfil_rw_unlock_shared(&cfil_lck_rw);
7001 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7002 /* NOTREACHED */
7003 }
7004
7005 struct m_tag *
7006 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7007 {
7008 struct m_tag *tag = NULL;
7009 struct cfil_tag *ctag = NULL;
7010 struct cfil_hash_entry *hash_entry = NULL;
7011 struct inpcb *inp = NULL;
7012
7013 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7014 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7015 return NULL;
7016 }
7017
7018 inp = sotoinpcb(cfil_info->cfi_so);
7019
7020 /* Allocate a tag */
7021 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7022 sizeof(struct cfil_tag), M_DONTWAIT, m);
7023
7024 if (tag) {
7025 ctag = (struct cfil_tag*)(tag + 1);
7026 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7027 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7028 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7029
7030 hash_entry = cfil_info->cfi_hash_entry;
7031 if (hash_entry->cfentry_family == AF_INET6) {
7032 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7033 &hash_entry->cfentry_faddr.addr6,
7034 hash_entry->cfentry_fport);
7035 } else if (hash_entry->cfentry_family == AF_INET) {
7036 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7037 hash_entry->cfentry_faddr.addr46.ia46_addr4,
7038 hash_entry->cfentry_fport);
7039 }
7040 m_tag_prepend(m, tag);
7041 return tag;
7042 }
7043 return NULL;
7044 }
7045
7046 struct m_tag *
7047 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
7048 struct sockaddr **faddr, int *inp_flags)
7049 {
7050 struct m_tag *tag = NULL;
7051 struct cfil_tag *ctag = NULL;
7052
7053 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7054 if (tag) {
7055 ctag = (struct cfil_tag *)(tag + 1);
7056 if (state_change_cnt) {
7057 *state_change_cnt = ctag->cfil_so_state_change_cnt;
7058 }
7059 if (options) {
7060 *options = ctag->cfil_so_options;
7061 }
7062 if (faddr) {
7063 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7064 }
7065 if (inp_flags) {
7066 *inp_flags = ctag->cfil_inp_flags;
7067 }
7068
7069 /*
7070 * Unlink tag and hand it over to caller.
7071 * Note that caller will be responsible to free it.
7072 */
7073 m_tag_unlink(m, tag);
7074 return tag;
7075 }
7076 return NULL;
7077 }
7078
7079 boolean_t
7080 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7081 {
7082 struct m_tag *tag = NULL;
7083 struct cfil_tag *ctag = NULL;
7084
7085 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7086 if (tag) {
7087 ctag = (struct cfil_tag *)(tag + 1);
7088 if (inp_flags) {
7089 *inp_flags = ctag->cfil_inp_flags;
7090 }
7091 return true;
7092 }
7093 return false;
7094 }
7095
7096 static int
7097 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7098 {
7099 struct content_filter *cfc = NULL;
7100 errno_t error = 0;
7101 size_t msgsize = 0;
7102
7103 if (buffer == NULL || stats_count == 0) {
7104 return error;
7105 }
7106
7107 if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7108 return error;
7109 }
7110
7111 cfc = content_filters[kcunit - 1];
7112 if (cfc == NULL) {
7113 return error;
7114 }
7115
7116 /* Would be wasteful to try */
7117 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7118 error = ENOBUFS;
7119 goto done;
7120 }
7121
7122 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7123 buffer->msghdr.cfm_len = msgsize;
7124 buffer->msghdr.cfm_version = 1;
7125 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7126 buffer->msghdr.cfm_op = CFM_OP_STATS;
7127 buffer->msghdr.cfm_sock_id = 0;
7128 buffer->count = stats_count;
7129
7130 #if STATS_DEBUG
7131 CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7132 kcunit,
7133 (unsigned long)msgsize,
7134 (unsigned long)sizeof(struct cfil_msg_stats_report),
7135 (unsigned long)sizeof(struct cfil_msg_sock_stats),
7136 (unsigned long)stats_count);
7137 #endif
7138
7139 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7140 buffer,
7141 msgsize,
7142 CTL_DATA_EOR);
7143 if (error != 0) {
7144 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7145 goto done;
7146 }
7147 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7148
7149 #if STATS_DEBUG
7150 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7151 #endif
7152
7153 done:
7154
7155 if (error == ENOBUFS) {
7156 OSIncrementAtomic(
7157 &cfil_stats.cfs_stats_event_flow_control);
7158
7159 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7160 cfil_rw_lock_exclusive(&cfil_lck_rw);
7161 }
7162
7163 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7164
7165 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7166 } else if (error != 0) {
7167 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7168 }
7169
7170 return error;
7171 }
7172
7173 static void
7174 cfil_stats_report_thread_sleep(bool forever)
7175 {
7176 #if STATS_DEBUG
7177 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7178 #endif
7179
7180 if (forever) {
7181 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7182 THREAD_INTERRUPTIBLE);
7183 } else {
7184 uint64_t deadline = 0;
7185 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7186 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7187
7188 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7189 THREAD_INTERRUPTIBLE, deadline);
7190 }
7191 }
7192
7193 static void
7194 cfil_stats_report_thread_func(void *v, wait_result_t w)
7195 {
7196 #pragma unused(v, w)
7197
7198 ASSERT(cfil_stats_report_thread == current_thread());
7199 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7200
7201 // Kick off gc shortly
7202 cfil_stats_report_thread_sleep(false);
7203 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7204 /* NOTREACHED */
7205 }
7206
7207 static bool
7208 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7209 struct cfil_info *cfil_info,
7210 struct cfil_entry *entry,
7211 struct timeval current_tv)
7212 {
7213 struct cfil_stats_report_buffer *buffer = NULL;
7214 struct cfil_msg_sock_stats *flow_array = NULL;
7215 struct cfil_msg_sock_stats *stats = NULL;
7216 struct inpcb *inp = NULL;
7217 struct timeval diff_time;
7218 uint64_t diff_time_usecs;
7219 int index = 0;
7220
7221 if (entry->cfe_stats_report_frequency == 0) {
7222 return false;
7223 }
7224
7225 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7226 if (buffer == NULL) {
7227 #if STATS_DEBUG
7228 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7229 #endif
7230 return false;
7231 }
7232
7233 timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7234 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7235
7236 #if STATS_DEBUG
7237 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7238 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7239 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7240 (unsigned long long)current_tv.tv_sec,
7241 (unsigned long long)current_tv.tv_usec,
7242 (unsigned long long)diff_time.tv_sec,
7243 (unsigned long long)diff_time.tv_usec,
7244 (unsigned long long)diff_time_usecs,
7245 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7246 cfil_info->cfi_sock_id);
7247 #endif
7248
7249 // Compare elapsed time in usecs
7250 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7251 #if STATS_DEBUG
7252 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7253 cfil_info->cfi_byte_inbound_count,
7254 entry->cfe_byte_inbound_count_reported);
7255 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7256 cfil_info->cfi_byte_outbound_count,
7257 entry->cfe_byte_outbound_count_reported);
7258 #endif
7259 // Check if flow has new bytes that have not been reported
7260 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7261 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7262 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7263 index = global_cfil_stats_counts[kcunit - 1];
7264
7265 stats = &flow_array[index];
7266 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7267 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7268 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7269
7270 if (entry->cfe_laddr_sent == false) {
7271 /* cache it if necessary */
7272 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7273 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7274 if (inp != NULL) {
7275 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7276 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7277 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7278 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7279 src, dst, !IS_INP_V6(inp), outgoing);
7280 }
7281 }
7282
7283 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7284 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7285 entry->cfe_laddr_sent = true;
7286 }
7287 }
7288
7289 global_cfil_stats_counts[kcunit - 1]++;
7290
7291 entry->cfe_stats_report_ts = current_tv;
7292 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7293 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7294 #if STATS_DEBUG
7295 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7296 #endif
7297 CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7298 return true;
7299 }
7300 }
7301 return false;
7302 }
7303
7304 static void
7305 cfil_stats_report(void *v, wait_result_t w)
7306 {
7307 #pragma unused(v, w)
7308
7309 struct cfil_info *cfil_info = NULL;
7310 struct cfil_entry *entry = NULL;
7311 struct timeval current_tv;
7312 uint32_t flow_count = 0;
7313 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7314 bool flow_reported = false;
7315
7316 #if STATS_DEBUG
7317 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7318 #endif
7319
7320 do {
7321 // Collect all sock ids of flows that has new stats
7322 cfil_rw_lock_shared(&cfil_lck_rw);
7323
7324 if (cfil_sock_attached_stats_count == 0) {
7325 #if STATS_DEBUG
7326 CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7327 #endif
7328 cfil_rw_unlock_shared(&cfil_lck_rw);
7329 goto go_sleep;
7330 }
7331
7332 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7333 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7334 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7335 }
7336 global_cfil_stats_counts[kcunit - 1] = 0;
7337 }
7338
7339 microuptime(&current_tv);
7340 flow_count = 0;
7341
7342 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7343 if (saved_next_sock_id != 0 &&
7344 saved_next_sock_id == cfil_info->cfi_sock_id) {
7345 // Here is where we left off previously, start accumulating
7346 saved_next_sock_id = 0;
7347 }
7348
7349 if (saved_next_sock_id == 0) {
7350 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7351 // Examine a fixed number of flows each round. Remember the current flow
7352 // so we can start from here for next loop
7353 saved_next_sock_id = cfil_info->cfi_sock_id;
7354 break;
7355 }
7356
7357 flow_reported = false;
7358 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7359 entry = &cfil_info->cfi_entries[kcunit - 1];
7360 if (entry->cfe_filter == NULL) {
7361 #if STATS_DEBUG
7362 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7363 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7364 #endif
7365 continue;
7366 }
7367
7368 if ((entry->cfe_stats_report_frequency > 0) &&
7369 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7370 flow_reported = true;
7371 }
7372 }
7373 if (flow_reported == true) {
7374 flow_count++;
7375 }
7376 }
7377 }
7378
7379 if (flow_count > 0) {
7380 #if STATS_DEBUG
7381 CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7382 #endif
7383 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7384 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7385 global_cfil_stats_counts[kcunit - 1] > 0) {
7386 cfil_dispatch_stats_event_locked(kcunit,
7387 global_cfil_stats_report_buffers[kcunit - 1],
7388 global_cfil_stats_counts[kcunit - 1]);
7389 }
7390 }
7391 } else {
7392 cfil_rw_unlock_shared(&cfil_lck_rw);
7393 goto go_sleep;
7394 }
7395
7396 cfil_rw_unlock_shared(&cfil_lck_rw);
7397
7398 // Loop again if we haven't finished the whole cfil_info list
7399 } while (saved_next_sock_id != 0);
7400
7401 go_sleep:
7402
7403 // Sleep forever (until waken up) if no more flow to report
7404 cfil_rw_lock_shared(&cfil_lck_rw);
7405 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7406 cfil_rw_unlock_shared(&cfil_lck_rw);
7407 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7408 /* NOTREACHED */
7409 }