]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
d0f3b06b39480f73e193f42a5c02af7fccb461e2
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50 * UDP, ICMP, etc).
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
53 *
54 *
55 * NECP FILTER CONTROL UNIT
56 *
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
60 *
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
65 *
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
68 *
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
72 *
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77 *
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
80 *
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
83 *
84 *
85 * THE MESSAGING PROTOCOL
86 *
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
94 *
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
101 *
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
105 *
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111 *
112 *
113 * EVENT MESSAGES
114 *
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
121 *
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
125 *
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129 *
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 *
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
136 *
137 *
138 * ACTION MESSAGES
139 *
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
147 *
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
151 *
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160 *
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
165 *
166 *
167 * PER FLOW "struct cfil_info"
168 *
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
175 *
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
180 * decision;
181 * - The inject queue for data that passed the filters and that needs
182 * to be re-injected;
183 * - A content filter specific state in a set of "struct cfil_entry"
184 *
185 *
186 * CONTENT FILTER STATE "struct cfil_entry"
187 *
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
190 *
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
194 *
195 * For each direction, "struct cfil_entry" maintains the following information:
196 * - The pass offset
197 * - The peek offset
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
203 *
204 *
205 * CONTENT FILTER QUEUES
206 *
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
210 *
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
213 * the list of mbufs.
214 *
215 * The data moves into the three content filter queues according to this
216 * sequence:
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
220 *
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
223 *
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
228 *
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
234 *
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
237 * INET/INET6 socket.
238 *
239 *
240 * IMPACT ON FLOW CONTROL
241 *
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
244 *
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
248 * processing delays.
249 *
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
256 *
257 *
258 * LOCKING STRATEGY
259 *
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
263 * threads.
264 *
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
267 *
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
271 *
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
275 *
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
278 *
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
282 *
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
285 *
286 * DATAGRAM SPECIFICS:
287 *
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
291 *
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
296 *
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
302 *
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
310 *
311 * LIMITATIONS
312 *
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314 *
315 * - Does not support TCP unordered messages
316 */
317
318 /*
319 * TO DO LIST
320 *
321 * Deal with OOB
322 *
323 */
324
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
334
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
338
339 #include <net/content_filter.h>
340 #include <net/content_filter_crypto.h>
341
342 #define _IP_VHL
343 #include <netinet/ip.h>
344 #include <netinet/in_pcb.h>
345 #include <netinet/tcp.h>
346 #include <netinet/tcp_var.h>
347 #include <netinet/udp.h>
348 #include <netinet/udp_var.h>
349
350 #include <string.h>
351 #include <libkern/libkern.h>
352 #include <kern/sched_prim.h>
353 #include <kern/task.h>
354 #include <mach/task_info.h>
355
356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
357 #define MAX_CONTENT_FILTER 2
358 #else
359 #define MAX_CONTENT_FILTER 8
360 #endif
361
362 extern struct inpcbinfo ripcbinfo;
363 struct cfil_entry;
364
365 /*
366 * The structure content_filter represents a user space content filter
367 * It's created and associated with a kernel control socket instance
368 */
369 struct content_filter {
370 kern_ctl_ref cf_kcref;
371 u_int32_t cf_kcunit;
372 u_int32_t cf_flags;
373
374 uint32_t cf_necp_control_unit;
375
376 uint32_t cf_sock_count;
377 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
378
379 cfil_crypto_state_t cf_crypto_state;
380 };
381
382 #define CFF_ACTIVE 0x01
383 #define CFF_DETACHING 0x02
384 #define CFF_FLOW_CONTROLLED 0x04
385
386 struct content_filter **content_filters = NULL;
387 uint32_t cfil_active_count = 0; /* Number of active content filters */
388 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
389 uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
390 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
392
393 static kern_ctl_ref cfil_kctlref = NULL;
394
395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
396 static lck_attr_t *cfil_lck_attr = NULL;
397 static lck_grp_t *cfil_lck_grp = NULL;
398 decl_lck_rw_data(static, cfil_lck_rw);
399
400 #define CFIL_RW_LCK_MAX 8
401
402 int cfil_rw_nxt_lck = 0;
403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
404
405 int cfil_rw_nxt_unlck = 0;
406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
407
408 static ZONE_DECLARE(content_filter_zone, "content_filter",
409 sizeof(struct content_filter), ZC_NONE);
410
411 MBUFQ_HEAD(cfil_mqhead);
412
413 struct cfil_queue {
414 uint64_t q_start; /* offset of first byte in queue */
415 uint64_t q_end; /* offset of last byte in queue */
416 struct cfil_mqhead q_mq;
417 };
418
419 /*
420 * struct cfil_entry
421 *
422 * The is one entry per content filter
423 */
424 struct cfil_entry {
425 TAILQ_ENTRY(cfil_entry) cfe_link;
426 SLIST_ENTRY(cfil_entry) cfe_order_link;
427 struct content_filter *cfe_filter;
428
429 struct cfil_info *cfe_cfil_info;
430 uint32_t cfe_flags;
431 uint32_t cfe_necp_control_unit;
432 struct timeval cfe_last_event; /* To user space */
433 struct timeval cfe_last_action; /* From user space */
434 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
435 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
436 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
437 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
438 boolean_t cfe_laddr_sent;
439
440 struct cfe_buf {
441 /*
442 * cfe_pending_q holds data that has been delivered to
443 * the filter and for which we are waiting for an action
444 */
445 struct cfil_queue cfe_pending_q;
446 /*
447 * This queue is for data that has not be delivered to
448 * the content filter (new data, pass peek or flow control)
449 */
450 struct cfil_queue cfe_ctl_q;
451
452 uint64_t cfe_pass_offset;
453 uint64_t cfe_peek_offset;
454 uint64_t cfe_peeked;
455 } cfe_snd, cfe_rcv;
456 };
457
458 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
459 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
460 #define CFEF_DATA_START 0x0004 /* can send data event */
461 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
462 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
463 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
464 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
465 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
466
467
468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
469 struct timeval64 _tdiff; \
470 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
471 timersub(t1, t0, &_tdiff); \
472 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
473 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
474 (cfil)->cfi_op_list_ctr ++; \
475 }
476
477 struct cfil_hash_entry;
478
479 /*
480 * struct cfil_info
481 *
482 * There is a struct cfil_info per socket
483 */
484 struct cfil_info {
485 TAILQ_ENTRY(cfil_info) cfi_link;
486 TAILQ_ENTRY(cfil_info) cfi_link_stats;
487 struct socket *cfi_so;
488 uint64_t cfi_flags;
489 uint64_t cfi_sock_id;
490 struct timeval64 cfi_first_event;
491 uint32_t cfi_op_list_ctr;
492 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
493 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
494 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
495 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
496
497 int cfi_dir;
498 uint64_t cfi_byte_inbound_count;
499 uint64_t cfi_byte_outbound_count;
500
501 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
502 u_int32_t cfi_filter_control_unit;
503 u_int32_t cfi_debug;
504 struct cfi_buf {
505 /*
506 * cfi_pending_first and cfi_pending_last describe the total
507 * amount of data outstanding for all the filters on
508 * this socket and data in the flow queue
509 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
510 */
511 uint64_t cfi_pending_first;
512 uint64_t cfi_pending_last;
513 uint32_t cfi_pending_mbcnt;
514 uint32_t cfi_pending_mbnum;
515 uint32_t cfi_tail_drop_cnt;
516 /*
517 * cfi_pass_offset is the minimum of all the filters
518 */
519 uint64_t cfi_pass_offset;
520 /*
521 * cfi_inject_q holds data that needs to be re-injected
522 * into the socket after filtering and that can
523 * be queued because of flow control
524 */
525 struct cfil_queue cfi_inject_q;
526 } cfi_snd, cfi_rcv;
527
528 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
529 struct cfil_hash_entry *cfi_hash_entry;
530 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
531 os_refcnt_t cfi_ref_count;
532 } __attribute__((aligned(8)));
533
534 #define CFIF_DROP 0x0001 /* drop action applied */
535 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
536 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
537 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
538 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
539 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
540 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
541 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
542 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
543
544 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
545 #define CFI_SHIFT_GENCNT 32
546 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
547 #define CFI_SHIFT_FLOWHASH 0
548
549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
550
551 static ZONE_DECLARE(cfil_info_zone, "cfil_info",
552 sizeof(struct cfil_info), ZC_NONE);
553
554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
556
557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
559
560 /*
561 * UDP Socket Support
562 */
563 LIST_HEAD(cfilhashhead, cfil_hash_entry);
564 #define CFILHASHSIZE 16
565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
566
567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
571 (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
572 #define IS_RAW(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW && so->so_proto->pr_protocol == IPPROTO_RAW)
573
574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
576 #else
577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
578 #endif
579
580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
583
584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
585 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
588 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
592 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
593 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
595 ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
596 entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
598
599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
600 (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
601 (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
602 so->so_proto->pr_type != SOCK_STREAM || \
603 so->so_proto->pr_protocol != IPPROTO_TCP || \
604 (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
605 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
606
607 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
608
609 #define CFIL_INFO_FREE(cfil_info) \
610 if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
611 cfil_info_free(cfil_info); \
612 }
613
614 /*
615 * Periodic Statistics Report:
616 */
617 static struct thread *cfil_stats_report_thread;
618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
620 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
621
622 /* This buffer must have same layout as struct cfil_msg_stats_report */
623 struct cfil_stats_report_buffer {
624 struct cfil_msg_hdr msghdr;
625 uint32_t count;
626 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
627 };
628 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
629 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
630
631 /*
632 * UDP Garbage Collection:
633 */
634 static struct thread *cfil_udp_gc_thread;
635 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
636 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
637 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
639
640 /*
641 * UDP flow queue thresholds
642 */
643 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
644 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
645 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
646 /*
647 * UDP flow queue threshold globals:
648 */
649 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
650 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
651
652 /*
653 * struct cfil_hash_entry
654 *
655 * Hash entry for cfil_info
656 */
657 struct cfil_hash_entry {
658 LIST_ENTRY(cfil_hash_entry) cfentry_link;
659 struct cfil_info *cfentry_cfil;
660 u_short cfentry_fport;
661 u_short cfentry_lport;
662 sa_family_t cfentry_family;
663 u_int32_t cfentry_flowhash;
664 u_int64_t cfentry_lastused;
665 union {
666 /* foreign host table entry */
667 struct in_addr_4in6 addr46;
668 struct in6_addr addr6;
669 } cfentry_faddr;
670 union {
671 /* local host table entry */
672 struct in_addr_4in6 addr46;
673 struct in6_addr addr6;
674 } cfentry_laddr;
675 uint8_t cfentry_laddr_updated: 1;
676 uint8_t cfentry_lport_updated: 1;
677 uint8_t cfentry_reserved: 6;
678 };
679
680 /*
681 * struct cfil_db
682 *
683 * For each UDP socket, this is a hash table maintaining all cfil_info structs
684 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
685 */
686 struct cfil_db {
687 struct socket *cfdb_so;
688 uint32_t cfdb_count; /* Number of total content filters */
689 struct cfilhashhead *cfdb_hashbase;
690 u_long cfdb_hashmask;
691 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
692 };
693
694 /*
695 * CFIL specific mbuf tag:
696 * Save state of socket at the point of data entry into cfil.
697 * Use saved state for reinjection at protocol layer.
698 */
699 struct cfil_tag {
700 union sockaddr_in_4_6 cfil_faddr;
701 uint32_t cfil_so_state_change_cnt;
702 uint32_t cfil_so_options;
703 int cfil_inp_flags;
704 };
705
706 static ZONE_DECLARE(cfil_hash_entry_zone, "cfil_entry_hash",
707 sizeof(struct cfil_hash_entry), ZC_NONE);
708
709 static ZONE_DECLARE(cfil_db_zone, "cfil_db",
710 sizeof(struct cfil_db), ZC_NONE);
711
712 /*
713 * Statistics
714 */
715
716 struct cfil_stats cfil_stats;
717
718 /*
719 * For troubleshooting
720 */
721 int cfil_log_level = LOG_ERR;
722 int cfil_debug = 1;
723
724 // Debug controls added for selective debugging.
725 // Disabled for production. If enabled,
726 // these will have performance impact
727 #define LIFECYCLE_DEBUG 0
728 #define VERDICT_DEBUG 0
729 #define DATA_DEBUG 0
730 #define SHOW_DEBUG 0
731 #define GC_DEBUG 0
732 #define STATS_DEBUG 0
733
734 /*
735 * Sysctls for logs and statistics
736 */
737 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
738 struct sysctl_req *);
739 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
740 struct sysctl_req *);
741
742 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
743
744 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
745 &cfil_log_level, 0, "");
746
747 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
748 &cfil_debug, 0, "");
749
750 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
751 &cfil_sock_attached_count, 0, "");
752
753 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
754 &cfil_active_count, 0, "");
755
756 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
757 &cfil_close_wait_timeout, 0, "");
758
759 static int cfil_sbtrim = 1;
760 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
761 &cfil_sbtrim, 0, "");
762
763 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
764 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
765
766 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
767 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
768
769 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
770 &cfil_stats, cfil_stats, "");
771
772 /*
773 * Forward declaration to appease the compiler
774 */
775 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
776 uint64_t, uint64_t);
777 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
780 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
781 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
782 struct mbuf *, struct mbuf *, uint32_t);
783 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
784 struct mbuf *, uint32_t);
785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
786 struct in_addr, u_int16_t);
787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
788 struct in6_addr *, u_int16_t);
789
790 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
791 static void cfil_info_free(struct cfil_info *);
792 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
793 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
794 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
795 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
796 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
797 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
798 static void cfil_info_verify(struct cfil_info *);
799 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
800 uint64_t, uint64_t);
801 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
802 static void cfil_release_sockbuf(struct socket *, int);
803 static int cfil_filters_attached(struct socket *);
804
805 static void cfil_rw_lock_exclusive(lck_rw_t *);
806 static void cfil_rw_unlock_exclusive(lck_rw_t *);
807 static void cfil_rw_lock_shared(lck_rw_t *);
808 static void cfil_rw_unlock_shared(lck_rw_t *);
809 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
811
812 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
813 static errno_t cfil_db_init(struct socket *);
814 static void cfil_db_free(struct socket *so);
815 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
816 struct cfil_hash_entry *cfil_db_lookup_entry_internal(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t, boolean_t);
817 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
818 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
819 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *, struct mbuf *);
820 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
821 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, struct mbuf *, int);
822 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
823 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
824 struct mbuf *, struct mbuf *, uint32_t);
825 static int cfil_sock_udp_get_address_from_control(sa_family_t, struct mbuf *, uint8_t **);
826 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
827 static void cfil_sock_udp_is_closed(struct socket *);
828 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
829 static int cfil_sock_udp_shutdown(struct socket *, int *);
830 static void cfil_sock_udp_close_wait(struct socket *);
831 static void cfil_sock_udp_buf_update(struct sockbuf *);
832 static int cfil_filters_udp_attached(struct socket *, bool);
833 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
834 struct in6_addr **, struct in6_addr **,
835 u_int16_t *, u_int16_t *);
836 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
837 struct in_addr *, struct in_addr *,
838 u_int16_t *, u_int16_t *);
839 static void cfil_info_log(int, struct cfil_info *, const char *);
840 void cfil_filter_show(u_int32_t);
841 void cfil_info_show(void);
842 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int64_t);
843 bool cfil_info_action_timed_out(struct cfil_info *, int);
844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
845 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
846 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
847 static void cfil_udp_gc_thread_func(void *, wait_result_t);
848 static void cfil_info_udp_expire(void *, wait_result_t);
849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *, bool);
850 static void cfil_sock_received_verdict(struct socket *so);
851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
852 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
853 boolean_t, boolean_t);
854 static void cfil_stats_report_thread_func(void *, wait_result_t);
855 static void cfil_stats_report(void *v, wait_result_t w);
856
857 bool check_port(struct sockaddr *, u_short);
858
859 /*
860 * Content filter global read write lock
861 */
862
863 static void
864 cfil_rw_lock_exclusive(lck_rw_t *lck)
865 {
866 void *lr_saved;
867
868 lr_saved = __builtin_return_address(0);
869
870 lck_rw_lock_exclusive(lck);
871
872 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
873 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
874 }
875
876 static void
877 cfil_rw_unlock_exclusive(lck_rw_t *lck)
878 {
879 void *lr_saved;
880
881 lr_saved = __builtin_return_address(0);
882
883 lck_rw_unlock_exclusive(lck);
884
885 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
886 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
887 }
888
889 static void
890 cfil_rw_lock_shared(lck_rw_t *lck)
891 {
892 void *lr_saved;
893
894 lr_saved = __builtin_return_address(0);
895
896 lck_rw_lock_shared(lck);
897
898 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
899 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
900 }
901
902 static void
903 cfil_rw_unlock_shared(lck_rw_t *lck)
904 {
905 void *lr_saved;
906
907 lr_saved = __builtin_return_address(0);
908
909 lck_rw_unlock_shared(lck);
910
911 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
912 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
913 }
914
915 static boolean_t
916 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
917 {
918 void *lr_saved;
919 boolean_t upgraded;
920
921 lr_saved = __builtin_return_address(0);
922
923 upgraded = lck_rw_lock_shared_to_exclusive(lck);
924 if (upgraded) {
925 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
926 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
927 }
928 return upgraded;
929 }
930
931 static void
932 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
933 {
934 void *lr_saved;
935
936 lr_saved = __builtin_return_address(0);
937
938 lck_rw_lock_exclusive_to_shared(lck);
939
940 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
941 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
942 }
943
944 static void
945 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
946 {
947 #if !MACH_ASSERT
948 #pragma unused(lck, exclusive)
949 #endif
950 LCK_RW_ASSERT(lck,
951 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
952 }
953
954 /*
955 * Return the number of bytes in the mbuf chain using the same
956 * method as m_length() or sballoc()
957 *
958 * Returns data len - starting from PKT start
959 * - retmbcnt - optional param to get total mbuf bytes in chain
960 * - retmbnum - optional param to get number of mbufs in chain
961 */
962 static unsigned int
963 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
964 {
965 struct mbuf *m0;
966 unsigned int pktlen = 0;
967 int mbcnt;
968 int mbnum;
969
970 // Locate the start of data
971 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
972 if (m0->m_flags & M_PKTHDR) {
973 break;
974 }
975 }
976 if (m0 == NULL) {
977 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
978 return 0;
979 }
980 m = m0;
981
982 if (retmbcnt == NULL && retmbnum == NULL) {
983 return m_length(m);
984 }
985
986 pktlen = 0;
987 mbcnt = 0;
988 mbnum = 0;
989 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
990 pktlen += m0->m_len;
991 mbnum++;
992 mbcnt += MSIZE;
993 if (m0->m_flags & M_EXT) {
994 mbcnt += m0->m_ext.ext_size;
995 }
996 }
997 if (retmbcnt) {
998 *retmbcnt = mbcnt;
999 }
1000 if (retmbnum) {
1001 *retmbnum = mbnum;
1002 }
1003 return pktlen;
1004 }
1005
1006 static struct mbuf *
1007 cfil_data_start(struct mbuf *m)
1008 {
1009 struct mbuf *m0;
1010
1011 // Locate the start of data
1012 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1013 if (m0->m_flags & M_PKTHDR) {
1014 break;
1015 }
1016 }
1017 return m0;
1018 }
1019
1020 /*
1021 * Common mbuf queue utilities
1022 */
1023
1024 static inline void
1025 cfil_queue_init(struct cfil_queue *cfq)
1026 {
1027 cfq->q_start = 0;
1028 cfq->q_end = 0;
1029 MBUFQ_INIT(&cfq->q_mq);
1030 }
1031
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue *cfq)
1034 {
1035 uint64_t drained = cfq->q_start - cfq->q_end;
1036 cfq->q_start = 0;
1037 cfq->q_end = 0;
1038 MBUFQ_DRAIN(&cfq->q_mq);
1039
1040 return drained;
1041 }
1042
1043 /* Return 1 when empty, 0 otherwise */
1044 static inline int
1045 cfil_queue_empty(struct cfil_queue *cfq)
1046 {
1047 return MBUFQ_EMPTY(&cfq->q_mq);
1048 }
1049
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue *cfq)
1052 {
1053 return cfq->q_start;
1054 }
1055
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue *cfq)
1058 {
1059 return cfq->q_end;
1060 }
1061
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue *cfq)
1064 {
1065 return cfq->q_end - cfq->q_start;
1066 }
1067
1068 /*
1069 * Routines to verify some fundamental assumptions
1070 */
1071
1072 static void
1073 cfil_queue_verify(struct cfil_queue *cfq)
1074 {
1075 mbuf_t chain;
1076 mbuf_t m;
1077 mbuf_t n;
1078 uint64_t queuesize = 0;
1079
1080 /* Verify offset are ordered */
1081 VERIFY(cfq->q_start <= cfq->q_end);
1082
1083 /*
1084 * When queue is empty, the offsets are equal otherwise the offsets
1085 * are different
1086 */
1087 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1088 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1089 cfq->q_start != cfq->q_end));
1090
1091 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1092 size_t chainsize = 0;
1093 m = chain;
1094 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1095 // skip the addr and control stuff if present
1096 m = cfil_data_start(m);
1097
1098 if (m == NULL ||
1099 m == (void *)M_TAG_FREE_PATTERN ||
1100 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1101 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1102 panic("%s - mq %p is free at %p", __func__,
1103 &cfq->q_mq, m);
1104 }
1105 for (n = m; n != NULL; n = n->m_next) {
1106 if (n->m_type != MT_DATA &&
1107 n->m_type != MT_HEADER &&
1108 n->m_type != MT_OOBDATA) {
1109 panic("%s - %p unsupported type %u", __func__,
1110 n, n->m_type);
1111 }
1112 chainsize += n->m_len;
1113 }
1114 if (mlen != chainsize) {
1115 panic("%s - %p m_length() %u != chainsize %lu",
1116 __func__, m, mlen, chainsize);
1117 }
1118 queuesize += chainsize;
1119 }
1120 if (queuesize != cfq->q_end - cfq->q_start) {
1121 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1122 m, queuesize, cfq->q_end - cfq->q_start);
1123 }
1124 }
1125
1126 static void
1127 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1128 {
1129 CFIL_QUEUE_VERIFY(cfq);
1130
1131 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1132 cfq->q_end += len;
1133
1134 CFIL_QUEUE_VERIFY(cfq);
1135 }
1136
1137 static void
1138 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1139 {
1140 CFIL_QUEUE_VERIFY(cfq);
1141
1142 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1143
1144 MBUFQ_REMOVE(&cfq->q_mq, m);
1145 MBUFQ_NEXT(m) = NULL;
1146 cfq->q_start += len;
1147
1148 CFIL_QUEUE_VERIFY(cfq);
1149 }
1150
1151 static mbuf_t
1152 cfil_queue_first(struct cfil_queue *cfq)
1153 {
1154 return MBUFQ_FIRST(&cfq->q_mq);
1155 }
1156
1157 static mbuf_t
1158 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1159 {
1160 #pragma unused(cfq)
1161 return MBUFQ_NEXT(m);
1162 }
1163
1164 static void
1165 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1166 {
1167 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1168 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1169
1170 /* Verify the queues are ordered so that pending is before ctl */
1171 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1172
1173 /* The peek offset cannot be less than the pass offset */
1174 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1175
1176 /* Make sure we've updated the offset we peeked at */
1177 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1178 }
1179
1180 static void
1181 cfil_entry_verify(struct cfil_entry *entry)
1182 {
1183 cfil_entry_buf_verify(&entry->cfe_snd);
1184 cfil_entry_buf_verify(&entry->cfe_rcv);
1185 }
1186
1187 static void
1188 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1189 {
1190 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1191
1192 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1193 }
1194
1195 static void
1196 cfil_info_verify(struct cfil_info *cfil_info)
1197 {
1198 int i;
1199
1200 if (cfil_info == NULL) {
1201 return;
1202 }
1203
1204 cfil_info_buf_verify(&cfil_info->cfi_snd);
1205 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1206
1207 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1208 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1209 }
1210 }
1211
1212 static void
1213 verify_content_filter(struct content_filter *cfc)
1214 {
1215 struct cfil_entry *entry;
1216 uint32_t count = 0;
1217
1218 VERIFY(cfc->cf_sock_count >= 0);
1219
1220 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1221 count++;
1222 VERIFY(cfc == entry->cfe_filter);
1223 }
1224 VERIFY(count == cfc->cf_sock_count);
1225 }
1226
1227 /*
1228 * Kernel control socket callbacks
1229 */
1230 static errno_t
1231 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1232 void **unitinfo)
1233 {
1234 errno_t error = 0;
1235 struct content_filter *cfc = NULL;
1236
1237 CFIL_LOG(LOG_NOTICE, "");
1238
1239 cfc = zalloc(content_filter_zone);
1240 if (cfc == NULL) {
1241 CFIL_LOG(LOG_ERR, "zalloc failed");
1242 error = ENOMEM;
1243 goto done;
1244 }
1245 bzero(cfc, sizeof(struct content_filter));
1246
1247 cfil_rw_lock_exclusive(&cfil_lck_rw);
1248 if (content_filters == NULL) {
1249 struct content_filter **tmp;
1250
1251 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1252
1253 MALLOC(tmp,
1254 struct content_filter **,
1255 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1256 M_TEMP,
1257 M_WAITOK | M_ZERO);
1258
1259 cfil_rw_lock_exclusive(&cfil_lck_rw);
1260
1261 if (tmp == NULL && content_filters == NULL) {
1262 error = ENOMEM;
1263 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1264 goto done;
1265 }
1266 /* Another thread may have won the race */
1267 if (content_filters != NULL) {
1268 FREE(tmp, M_TEMP);
1269 } else {
1270 content_filters = tmp;
1271 }
1272 }
1273
1274 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1275 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1276 error = EINVAL;
1277 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1278 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1279 error = EADDRINUSE;
1280 } else {
1281 /*
1282 * kernel control socket kcunit numbers start at 1
1283 */
1284 content_filters[sac->sc_unit - 1] = cfc;
1285
1286 cfc->cf_kcref = kctlref;
1287 cfc->cf_kcunit = sac->sc_unit;
1288 TAILQ_INIT(&cfc->cf_sock_entries);
1289
1290 *unitinfo = cfc;
1291 cfil_active_count++;
1292
1293 // Allocate periodic stats buffer for this filter
1294 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1295 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1296
1297 struct cfil_stats_report_buffer *buf;
1298
1299 MALLOC(buf,
1300 struct cfil_stats_report_buffer *,
1301 sizeof(struct cfil_stats_report_buffer),
1302 M_TEMP,
1303 M_WAITOK | M_ZERO);
1304
1305 cfil_rw_lock_exclusive(&cfil_lck_rw);
1306
1307 if (buf == NULL) {
1308 error = ENOMEM;
1309 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1310 goto done;
1311 }
1312
1313 /* Another thread may have won the race */
1314 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1315 FREE(buf, M_TEMP);
1316 } else {
1317 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1318 }
1319 }
1320 }
1321 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1322 done:
1323 if (error != 0 && cfc != NULL) {
1324 zfree(content_filter_zone, cfc);
1325 }
1326
1327 if (error == 0) {
1328 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1329 } else {
1330 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1331 }
1332
1333 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1334 error, cfil_active_count, sac->sc_unit);
1335
1336 return error;
1337 }
1338
1339 static errno_t
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1341 {
1342 #pragma unused(kctlref)
1343 errno_t error = 0;
1344 struct content_filter *cfc;
1345 struct cfil_entry *entry;
1346 uint64_t sock_flow_id = 0;
1347
1348 CFIL_LOG(LOG_NOTICE, "");
1349
1350 if (content_filters == NULL) {
1351 CFIL_LOG(LOG_ERR, "no content filter");
1352 error = EINVAL;
1353 goto done;
1354 }
1355 if (kcunit > MAX_CONTENT_FILTER) {
1356 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357 kcunit, MAX_CONTENT_FILTER);
1358 error = EINVAL;
1359 goto done;
1360 }
1361
1362 cfc = (struct content_filter *)unitinfo;
1363 if (cfc == NULL) {
1364 goto done;
1365 }
1366
1367 cfil_rw_lock_exclusive(&cfil_lck_rw);
1368 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1369 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1370 kcunit);
1371 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1372 goto done;
1373 }
1374 cfc->cf_flags |= CFF_DETACHING;
1375 /*
1376 * Remove all sockets from the filter
1377 */
1378 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1379 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1380
1381 verify_content_filter(cfc);
1382 /*
1383 * Accept all outstanding data by pushing to next filter
1384 * or back to socket
1385 *
1386 * TBD: Actually we should make sure all data has been pushed
1387 * back to socket
1388 */
1389 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1390 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1391 struct socket *so = cfil_info->cfi_so;
1392 sock_flow_id = cfil_info->cfi_sock_id;
1393
1394 /* Need to let data flow immediately */
1395 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1396 CFEF_DATA_START;
1397
1398 /*
1399 * Respect locking hierarchy
1400 */
1401 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1402
1403 socket_lock(so, 1);
1404
1405 /*
1406 * When cfe_filter is NULL the filter is detached
1407 * and the entry has been removed from cf_sock_entries
1408 */
1409 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1410 cfil_rw_lock_exclusive(&cfil_lck_rw);
1411 goto release;
1412 }
1413
1414 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1415 CFM_MAX_OFFSET,
1416 CFM_MAX_OFFSET);
1417
1418 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1419 CFM_MAX_OFFSET,
1420 CFM_MAX_OFFSET);
1421
1422 cfil_rw_lock_exclusive(&cfil_lck_rw);
1423
1424 /*
1425 * Check again to make sure if the cfil_info is still valid
1426 * as the socket may have been unlocked when when calling
1427 * cfil_acquire_sockbuf()
1428 */
1429 if (entry->cfe_filter == NULL ||
1430 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1431 goto release;
1432 }
1433
1434 /* The filter is now detached */
1435 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1436 #if LIFECYCLE_DEBUG
1437 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1438 #endif
1439 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1440 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1441 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1442 cfil_filters_attached(so) == 0) {
1443 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1444 (uint64_t)VM_KERNEL_ADDRPERM(so));
1445 wakeup((caddr_t)cfil_info);
1446 }
1447
1448 /*
1449 * Remove the filter entry from the content filter
1450 * but leave the rest of the state intact as the queues
1451 * may not be empty yet
1452 */
1453 entry->cfe_filter = NULL;
1454 entry->cfe_necp_control_unit = 0;
1455
1456 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1457 cfc->cf_sock_count--;
1458 release:
1459 socket_unlock(so, 1);
1460 }
1461 }
1462 verify_content_filter(cfc);
1463
1464 /* Free the stats buffer for this filter */
1465 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1466 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1467 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1468 }
1469 VERIFY(cfc->cf_sock_count == 0);
1470
1471 /*
1472 * Make filter inactive
1473 */
1474 content_filters[kcunit - 1] = NULL;
1475 cfil_active_count--;
1476 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1477
1478 if (cfc->cf_crypto_state != NULL) {
1479 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1480 cfc->cf_crypto_state = NULL;
1481 }
1482
1483 zfree(content_filter_zone, cfc);
1484 done:
1485 if (error == 0) {
1486 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1487 } else {
1488 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1489 }
1490
1491 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1492 error, cfil_active_count, kcunit);
1493
1494 return error;
1495 }
1496
1497 /*
1498 * cfil_acquire_sockbuf()
1499 *
1500 * Prevent any other thread from acquiring the sockbuf
1501 * We use sb_cfil_thread as a semaphore to prevent other threads from
1502 * messing with the sockbuf -- see sblock()
1503 * Note: We do not set SB_LOCK here because the thread may check or modify
1504 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505 * sblock(), sbunlock() or sodefunct()
1506 */
1507 static int
1508 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1509 {
1510 thread_t tp = current_thread();
1511 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1512 lck_mtx_t *mutex_held;
1513 int error = 0;
1514
1515 /*
1516 * Wait until no thread is holding the sockbuf and other content
1517 * filter threads have released the sockbuf
1518 */
1519 while ((sb->sb_flags & SB_LOCK) ||
1520 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1521 if (so->so_proto->pr_getlock != NULL) {
1522 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1523 } else {
1524 mutex_held = so->so_proto->pr_domain->dom_mtx;
1525 }
1526
1527 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1528
1529 sb->sb_wantlock++;
1530 VERIFY(sb->sb_wantlock != 0);
1531
1532 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1533 NULL);
1534
1535 VERIFY(sb->sb_wantlock != 0);
1536 sb->sb_wantlock--;
1537 }
1538 /*
1539 * Use reference count for repetitive calls on same thread
1540 */
1541 if (sb->sb_cfil_refs == 0) {
1542 VERIFY(sb->sb_cfil_thread == NULL);
1543 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1544
1545 sb->sb_cfil_thread = tp;
1546 sb->sb_flags |= SB_LOCK;
1547 }
1548 sb->sb_cfil_refs++;
1549
1550 /* We acquire the socket buffer when we need to cleanup */
1551 if (cfil_info == NULL) {
1552 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1553 (uint64_t)VM_KERNEL_ADDRPERM(so));
1554 error = 0;
1555 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1556 CFIL_LOG(LOG_ERR, "so %llx drop set",
1557 (uint64_t)VM_KERNEL_ADDRPERM(so));
1558 error = EPIPE;
1559 }
1560
1561 return error;
1562 }
1563
1564 static void
1565 cfil_release_sockbuf(struct socket *so, int outgoing)
1566 {
1567 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1568 thread_t tp = current_thread();
1569
1570 socket_lock_assert_owned(so);
1571
1572 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1573 panic("%s sb_cfil_thread %p not current %p", __func__,
1574 sb->sb_cfil_thread, tp);
1575 }
1576 /*
1577 * Don't panic if we are defunct because SB_LOCK has
1578 * been cleared by sodefunct()
1579 */
1580 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1581 panic("%s SB_LOCK not set on %p", __func__,
1582 sb);
1583 }
1584 /*
1585 * We can unlock when the thread unwinds to the last reference
1586 */
1587 sb->sb_cfil_refs--;
1588 if (sb->sb_cfil_refs == 0) {
1589 sb->sb_cfil_thread = NULL;
1590 sb->sb_flags &= ~SB_LOCK;
1591
1592 if (sb->sb_wantlock > 0) {
1593 wakeup(&sb->sb_flags);
1594 }
1595 }
1596 }
1597
1598 cfil_sock_id_t
1599 cfil_sock_id_from_socket(struct socket *so)
1600 {
1601 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1602 return so->so_cfil->cfi_sock_id;
1603 } else {
1604 return CFIL_SOCK_ID_NONE;
1605 }
1606 }
1607
1608 static bool
1609 cfil_socket_safe_lock(struct inpcb *inp)
1610 {
1611 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1612 socket_lock(inp->inp_socket, 1);
1613 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1614 return true;
1615 }
1616 socket_unlock(inp->inp_socket, 1);
1617 }
1618 return false;
1619 }
1620
1621 static struct socket *
1622 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1623 {
1624 struct socket *so = NULL;
1625 u_int64_t gencnt = cfil_sock_id >> 32;
1626 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1627 struct inpcb *inp = NULL;
1628 struct inpcbinfo *pcbinfo = NULL;
1629
1630 #if VERDICT_DEBUG
1631 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1632 #endif
1633
1634 if (udp_only) {
1635 goto find_udp;
1636 }
1637
1638 pcbinfo = &tcbinfo;
1639 lck_rw_lock_shared(pcbinfo->ipi_lock);
1640 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1641 if (inp->inp_state != INPCB_STATE_DEAD &&
1642 inp->inp_socket != NULL &&
1643 inp->inp_flowhash == flowhash &&
1644 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1645 inp->inp_socket->so_cfil != NULL) {
1646 if (cfil_socket_safe_lock(inp)) {
1647 so = inp->inp_socket;
1648 }
1649 break;
1650 }
1651 }
1652 lck_rw_done(pcbinfo->ipi_lock);
1653 if (so != NULL) {
1654 goto done;
1655 }
1656
1657 find_udp:
1658
1659 pcbinfo = &udbinfo;
1660 lck_rw_lock_shared(pcbinfo->ipi_lock);
1661 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1662 if (inp->inp_state != INPCB_STATE_DEAD &&
1663 inp->inp_socket != NULL &&
1664 inp->inp_socket->so_cfil_db != NULL &&
1665 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1666 if (cfil_socket_safe_lock(inp)) {
1667 so = inp->inp_socket;
1668 }
1669 break;
1670 }
1671 }
1672 lck_rw_done(pcbinfo->ipi_lock);
1673
1674 pcbinfo = &ripcbinfo;
1675 lck_rw_lock_shared(pcbinfo->ipi_lock);
1676 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1677 if (inp->inp_state != INPCB_STATE_DEAD &&
1678 inp->inp_socket != NULL &&
1679 inp->inp_socket->so_cfil_db != NULL &&
1680 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1681 if (cfil_socket_safe_lock(inp)) {
1682 so = inp->inp_socket;
1683 }
1684 break;
1685 }
1686 }
1687 lck_rw_done(pcbinfo->ipi_lock);
1688
1689 done:
1690 if (so == NULL) {
1691 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1692 CFIL_LOG(LOG_DEBUG,
1693 "no socket for sock_id %llx gencnt %llx flowhash %x",
1694 cfil_sock_id, gencnt, flowhash);
1695 }
1696
1697 return so;
1698 }
1699
1700 static struct socket *
1701 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1702 {
1703 struct socket *so = NULL;
1704 struct inpcb *inp = NULL;
1705 struct inpcbinfo *pcbinfo = &tcbinfo;
1706
1707 lck_rw_lock_shared(pcbinfo->ipi_lock);
1708 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1709 if (inp->inp_state != INPCB_STATE_DEAD &&
1710 inp->inp_socket != NULL &&
1711 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1712 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1713 if (cfil_socket_safe_lock(inp)) {
1714 so = inp->inp_socket;
1715 }
1716 break;
1717 }
1718 }
1719 lck_rw_done(pcbinfo->ipi_lock);
1720 if (so != NULL) {
1721 goto done;
1722 }
1723
1724 pcbinfo = &udbinfo;
1725 lck_rw_lock_shared(pcbinfo->ipi_lock);
1726 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1727 if (inp->inp_state != INPCB_STATE_DEAD &&
1728 inp->inp_socket != NULL &&
1729 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1730 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1731 if (cfil_socket_safe_lock(inp)) {
1732 so = inp->inp_socket;
1733 }
1734 break;
1735 }
1736 }
1737 lck_rw_done(pcbinfo->ipi_lock);
1738
1739 done:
1740 return so;
1741 }
1742
1743 static void
1744 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1745 {
1746 struct cfil_info *cfil = NULL;
1747 Boolean found = FALSE;
1748 int kcunit;
1749
1750 if (cfil_info == NULL) {
1751 return;
1752 }
1753
1754 if (report_frequency) {
1755 if (entry == NULL) {
1756 return;
1757 }
1758
1759 // Update stats reporting frequency.
1760 if (entry->cfe_stats_report_frequency != report_frequency) {
1761 entry->cfe_stats_report_frequency = report_frequency;
1762 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1763 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1764 }
1765 microuptime(&entry->cfe_stats_report_ts);
1766
1767 // Insert cfil_info into list only if it is not in yet.
1768 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1769 if (cfil == cfil_info) {
1770 return;
1771 }
1772 }
1773
1774 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1775
1776 // Wake up stats thread if this is first flow added
1777 if (cfil_sock_attached_stats_count == 0) {
1778 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1779 }
1780 cfil_sock_attached_stats_count++;
1781 #if STATS_DEBUG
1782 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1783 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1784 cfil_info->cfi_sock_id,
1785 entry->cfe_stats_report_frequency);
1786 #endif
1787 }
1788 } else {
1789 // Turn off stats reporting for this filter.
1790 if (entry != NULL) {
1791 // Already off, no change.
1792 if (entry->cfe_stats_report_frequency == 0) {
1793 return;
1794 }
1795
1796 entry->cfe_stats_report_frequency = 0;
1797 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1798 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1799 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1800 return;
1801 }
1802 }
1803 }
1804
1805 // No more filter asking for stats for this cfil_info, remove from list.
1806 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1807 found = FALSE;
1808 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1809 if (cfil == cfil_info) {
1810 found = TRUE;
1811 break;
1812 }
1813 }
1814 if (found) {
1815 cfil_sock_attached_stats_count--;
1816 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1817 #if STATS_DEBUG
1818 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1819 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1820 cfil_info->cfi_sock_id);
1821 #endif
1822 }
1823 }
1824 }
1825 }
1826
1827 static errno_t
1828 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1829 int flags)
1830 {
1831 #pragma unused(kctlref, flags)
1832 errno_t error = 0;
1833 struct cfil_msg_hdr *msghdr;
1834 struct content_filter *cfc = (struct content_filter *)unitinfo;
1835 struct socket *so;
1836 struct cfil_msg_action *action_msg;
1837 struct cfil_entry *entry;
1838 struct cfil_info *cfil_info = NULL;
1839 unsigned int data_len = 0;
1840
1841 CFIL_LOG(LOG_INFO, "");
1842
1843 if (content_filters == NULL) {
1844 CFIL_LOG(LOG_ERR, "no content filter");
1845 error = EINVAL;
1846 goto done;
1847 }
1848 if (kcunit > MAX_CONTENT_FILTER) {
1849 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1850 kcunit, MAX_CONTENT_FILTER);
1851 error = EINVAL;
1852 goto done;
1853 }
1854 if (m == NULL) {
1855 CFIL_LOG(LOG_ERR, "null mbuf");
1856 error = EINVAL;
1857 goto done;
1858 }
1859 data_len = m_length(m);
1860
1861 if (data_len < sizeof(struct cfil_msg_hdr)) {
1862 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1863 error = EINVAL;
1864 goto done;
1865 }
1866 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1867 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1868 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1869 error = EINVAL;
1870 goto done;
1871 }
1872 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1873 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1874 error = EINVAL;
1875 goto done;
1876 }
1877 if (msghdr->cfm_len > data_len) {
1878 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1879 error = EINVAL;
1880 goto done;
1881 }
1882
1883 /* Validate action operation */
1884 switch (msghdr->cfm_op) {
1885 case CFM_OP_DATA_UPDATE:
1886 OSIncrementAtomic(
1887 &cfil_stats.cfs_ctl_action_data_update);
1888 break;
1889 case CFM_OP_DROP:
1890 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1891 break;
1892 case CFM_OP_BLESS_CLIENT:
1893 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1894 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1895 error = EINVAL;
1896 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1897 msghdr->cfm_len,
1898 msghdr->cfm_op);
1899 goto done;
1900 }
1901 error = cfil_action_bless_client(kcunit, msghdr);
1902 goto done;
1903 case CFM_OP_SET_CRYPTO_KEY:
1904 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1905 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1906 error = EINVAL;
1907 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1908 msghdr->cfm_len,
1909 msghdr->cfm_op);
1910 goto done;
1911 }
1912 error = cfil_action_set_crypto_key(kcunit, msghdr);
1913 goto done;
1914 default:
1915 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1916 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1917 error = EINVAL;
1918 goto done;
1919 }
1920 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1921 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1922 error = EINVAL;
1923 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1924 msghdr->cfm_len,
1925 msghdr->cfm_op);
1926 goto done;
1927 }
1928 cfil_rw_lock_shared(&cfil_lck_rw);
1929 if (cfc != (void *)content_filters[kcunit - 1]) {
1930 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1931 kcunit);
1932 error = EINVAL;
1933 cfil_rw_unlock_shared(&cfil_lck_rw);
1934 goto done;
1935 }
1936 cfil_rw_unlock_shared(&cfil_lck_rw);
1937
1938 // Search for socket (TCP+UDP and lock so)
1939 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1940 if (so == NULL) {
1941 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1942 msghdr->cfm_sock_id);
1943 error = EINVAL;
1944 goto done;
1945 }
1946
1947 cfil_info = so->so_cfil_db != NULL ?
1948 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1949
1950 // We should not obtain global lock here in order to avoid deadlock down the path.
1951 // But we attempt to retain a valid cfil_info to prevent any deallocation until
1952 // we are done. Abort retain if cfil_info has already entered the free code path.
1953 if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1954 socket_unlock(so, 1);
1955 goto done;
1956 }
1957
1958 if (cfil_info == NULL) {
1959 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1960 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1961 error = EINVAL;
1962 goto unlock;
1963 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1964 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1965 (uint64_t)VM_KERNEL_ADDRPERM(so));
1966 error = EINVAL;
1967 goto unlock;
1968 }
1969
1970 if (cfil_info->cfi_debug) {
1971 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
1972 }
1973
1974 entry = &cfil_info->cfi_entries[kcunit - 1];
1975 if (entry->cfe_filter == NULL) {
1976 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1977 (uint64_t)VM_KERNEL_ADDRPERM(so));
1978 error = EINVAL;
1979 goto unlock;
1980 }
1981
1982 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1983 entry->cfe_flags |= CFEF_DATA_START;
1984 } else {
1985 CFIL_LOG(LOG_ERR,
1986 "so %llx attached not sent for %u",
1987 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1988 error = EINVAL;
1989 goto unlock;
1990 }
1991
1992 microuptime(&entry->cfe_last_action);
1993 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1994
1995 action_msg = (struct cfil_msg_action *)msghdr;
1996
1997 switch (msghdr->cfm_op) {
1998 case CFM_OP_DATA_UPDATE:
1999
2000 if (cfil_info->cfi_debug) {
2001 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2002 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2003 (uint64_t)VM_KERNEL_ADDRPERM(so),
2004 cfil_info->cfi_sock_id,
2005 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2006 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2007 }
2008
2009 #if VERDICT_DEBUG
2010 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2011 (uint64_t)VM_KERNEL_ADDRPERM(so),
2012 cfil_info->cfi_sock_id,
2013 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2014 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2015 #endif
2016 /*
2017 * Received verdict, at this point we know this
2018 * socket connection is allowed. Unblock thread
2019 * immediately before proceeding to process the verdict.
2020 */
2021 cfil_sock_received_verdict(so);
2022
2023 if (action_msg->cfa_out_peek_offset != 0 ||
2024 action_msg->cfa_out_pass_offset != 0) {
2025 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2026 action_msg->cfa_out_pass_offset,
2027 action_msg->cfa_out_peek_offset);
2028 }
2029 if (error == EJUSTRETURN) {
2030 error = 0;
2031 }
2032 if (error != 0) {
2033 break;
2034 }
2035 if (action_msg->cfa_in_peek_offset != 0 ||
2036 action_msg->cfa_in_pass_offset != 0) {
2037 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2038 action_msg->cfa_in_pass_offset,
2039 action_msg->cfa_in_peek_offset);
2040 }
2041 if (error == EJUSTRETURN) {
2042 error = 0;
2043 }
2044
2045 // Toggle stats reporting according to received verdict.
2046 cfil_rw_lock_exclusive(&cfil_lck_rw);
2047 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2048 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2049
2050 break;
2051
2052 case CFM_OP_DROP:
2053 if (cfil_info->cfi_debug) {
2054 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2055 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2056 (uint64_t)VM_KERNEL_ADDRPERM(so),
2057 cfil_info->cfi_sock_id,
2058 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2059 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2060 }
2061
2062 #if VERDICT_DEBUG
2063 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2064 (uint64_t)VM_KERNEL_ADDRPERM(so),
2065 cfil_info->cfi_sock_id,
2066 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2067 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2068 #endif
2069 error = cfil_action_drop(so, cfil_info, kcunit);
2070 cfil_sock_received_verdict(so);
2071 break;
2072
2073 default:
2074 error = EINVAL;
2075 break;
2076 }
2077 unlock:
2078 CFIL_INFO_FREE(cfil_info)
2079 socket_unlock(so, 1);
2080 done:
2081 mbuf_freem(m);
2082
2083 if (error == 0) {
2084 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2085 } else {
2086 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2087 }
2088
2089 return error;
2090 }
2091
2092 static errno_t
2093 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2094 int opt, void *data, size_t *len)
2095 {
2096 #pragma unused(kctlref, opt)
2097 struct cfil_info *cfil_info = NULL;
2098 errno_t error = 0;
2099 struct content_filter *cfc = (struct content_filter *)unitinfo;
2100
2101 CFIL_LOG(LOG_NOTICE, "");
2102
2103 cfil_rw_lock_shared(&cfil_lck_rw);
2104
2105 if (content_filters == NULL) {
2106 CFIL_LOG(LOG_ERR, "no content filter");
2107 error = EINVAL;
2108 goto done;
2109 }
2110 if (kcunit > MAX_CONTENT_FILTER) {
2111 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2112 kcunit, MAX_CONTENT_FILTER);
2113 error = EINVAL;
2114 goto done;
2115 }
2116 if (cfc != (void *)content_filters[kcunit - 1]) {
2117 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2118 kcunit);
2119 error = EINVAL;
2120 goto done;
2121 }
2122 switch (opt) {
2123 case CFIL_OPT_NECP_CONTROL_UNIT:
2124 if (*len < sizeof(uint32_t)) {
2125 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2126 error = EINVAL;
2127 goto done;
2128 }
2129 if (data != NULL) {
2130 *(uint32_t *)data = cfc->cf_necp_control_unit;
2131 }
2132 break;
2133 case CFIL_OPT_GET_SOCKET_INFO:
2134 if (*len != sizeof(struct cfil_opt_sock_info)) {
2135 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2136 error = EINVAL;
2137 goto done;
2138 }
2139 if (data == NULL) {
2140 CFIL_LOG(LOG_ERR, "data not passed");
2141 error = EINVAL;
2142 goto done;
2143 }
2144
2145 struct cfil_opt_sock_info *sock_info =
2146 (struct cfil_opt_sock_info *) data;
2147
2148 // Unlock here so that we never hold both cfil_lck_rw and the
2149 // socket_lock at the same time. Otherwise, this can deadlock
2150 // because soclose() takes the socket_lock and then exclusive
2151 // cfil_lck_rw and we require the opposite order.
2152
2153 // WARNING: Be sure to never use anything protected
2154 // by cfil_lck_rw beyond this point.
2155 // WARNING: Be sure to avoid fallthrough and
2156 // goto return_already_unlocked from this branch.
2157 cfil_rw_unlock_shared(&cfil_lck_rw);
2158
2159 // Search (TCP+UDP) and lock socket
2160 struct socket *sock =
2161 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2162 if (sock == NULL) {
2163 #if LIFECYCLE_DEBUG
2164 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2165 sock_info->cfs_sock_id);
2166 #endif
2167 error = ENOENT;
2168 goto return_already_unlocked;
2169 }
2170
2171 cfil_info = (sock->so_cfil_db != NULL) ?
2172 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2173
2174 if (cfil_info == NULL) {
2175 #if LIFECYCLE_DEBUG
2176 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2177 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2178 #endif
2179 error = EINVAL;
2180 socket_unlock(sock, 1);
2181 goto return_already_unlocked;
2182 }
2183
2184 // Fill out family, type, and protocol
2185 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2186 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2187 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2188
2189 // Source and destination addresses
2190 struct inpcb *inp = sotoinpcb(sock);
2191 if (inp->inp_vflag & INP_IPV6) {
2192 struct in6_addr *laddr = NULL, *faddr = NULL;
2193 u_int16_t lport = 0, fport = 0;
2194
2195 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2196 &laddr, &faddr, &lport, &fport);
2197 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2198 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2199 } else if (inp->inp_vflag & INP_IPV4) {
2200 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2201 u_int16_t lport = 0, fport = 0;
2202
2203 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2204 &laddr, &faddr, &lport, &fport);
2205 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2206 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2207 }
2208
2209 // Set the pid info
2210 sock_info->cfs_pid = sock->last_pid;
2211 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2212
2213 if (sock->so_flags & SOF_DELEGATED) {
2214 sock_info->cfs_e_pid = sock->e_pid;
2215 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2216 } else {
2217 sock_info->cfs_e_pid = sock->last_pid;
2218 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2219 }
2220
2221 socket_unlock(sock, 1);
2222
2223 goto return_already_unlocked;
2224 default:
2225 error = ENOPROTOOPT;
2226 break;
2227 }
2228 done:
2229 cfil_rw_unlock_shared(&cfil_lck_rw);
2230
2231 return error;
2232
2233 return_already_unlocked:
2234
2235 return error;
2236 }
2237
2238 static errno_t
2239 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2240 int opt, void *data, size_t len)
2241 {
2242 #pragma unused(kctlref, opt)
2243 errno_t error = 0;
2244 struct content_filter *cfc = (struct content_filter *)unitinfo;
2245
2246 CFIL_LOG(LOG_NOTICE, "");
2247
2248 cfil_rw_lock_exclusive(&cfil_lck_rw);
2249
2250 if (content_filters == NULL) {
2251 CFIL_LOG(LOG_ERR, "no content filter");
2252 error = EINVAL;
2253 goto done;
2254 }
2255 if (kcunit > MAX_CONTENT_FILTER) {
2256 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2257 kcunit, MAX_CONTENT_FILTER);
2258 error = EINVAL;
2259 goto done;
2260 }
2261 if (cfc != (void *)content_filters[kcunit - 1]) {
2262 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2263 kcunit);
2264 error = EINVAL;
2265 goto done;
2266 }
2267 switch (opt) {
2268 case CFIL_OPT_NECP_CONTROL_UNIT:
2269 if (len < sizeof(uint32_t)) {
2270 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2271 "len too small %lu", len);
2272 error = EINVAL;
2273 goto done;
2274 }
2275 if (cfc->cf_necp_control_unit != 0) {
2276 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2277 "already set %u",
2278 cfc->cf_necp_control_unit);
2279 error = EINVAL;
2280 goto done;
2281 }
2282 cfc->cf_necp_control_unit = *(uint32_t *)data;
2283 break;
2284 default:
2285 error = ENOPROTOOPT;
2286 break;
2287 }
2288 done:
2289 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2290
2291 return error;
2292 }
2293
2294
2295 static void
2296 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2297 {
2298 #pragma unused(kctlref, flags)
2299 struct content_filter *cfc = (struct content_filter *)unitinfo;
2300 struct socket *so = NULL;
2301 int error;
2302 struct cfil_entry *entry;
2303 struct cfil_info *cfil_info = NULL;
2304
2305 CFIL_LOG(LOG_INFO, "");
2306
2307 if (content_filters == NULL) {
2308 CFIL_LOG(LOG_ERR, "no content filter");
2309 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2310 return;
2311 }
2312 if (kcunit > MAX_CONTENT_FILTER) {
2313 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2314 kcunit, MAX_CONTENT_FILTER);
2315 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2316 return;
2317 }
2318 cfil_rw_lock_shared(&cfil_lck_rw);
2319 if (cfc != (void *)content_filters[kcunit - 1]) {
2320 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2321 kcunit);
2322 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2323 goto done;
2324 }
2325 /* Let's assume the flow control is lifted */
2326 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2327 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2328 cfil_rw_lock_exclusive(&cfil_lck_rw);
2329 }
2330
2331 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2332
2333 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2334 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2335 }
2336 /*
2337 * Flow control will be raised again as soon as an entry cannot enqueue
2338 * to the kernel control socket
2339 */
2340 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2341 verify_content_filter(cfc);
2342
2343 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2344
2345 /* Find an entry that is flow controlled */
2346 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2347 if (entry->cfe_cfil_info == NULL ||
2348 entry->cfe_cfil_info->cfi_so == NULL) {
2349 continue;
2350 }
2351 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2352 continue;
2353 }
2354 }
2355 if (entry == NULL) {
2356 break;
2357 }
2358
2359 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2360
2361 cfil_info = entry->cfe_cfil_info;
2362 so = cfil_info->cfi_so;
2363
2364 cfil_rw_unlock_shared(&cfil_lck_rw);
2365 socket_lock(so, 1);
2366
2367 do {
2368 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2369 if (error == 0) {
2370 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2371 }
2372 cfil_release_sockbuf(so, 1);
2373 if (error != 0) {
2374 break;
2375 }
2376
2377 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2378 if (error == 0) {
2379 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2380 }
2381 cfil_release_sockbuf(so, 0);
2382 } while (0);
2383
2384 socket_lock_assert_owned(so);
2385 socket_unlock(so, 1);
2386
2387 cfil_rw_lock_shared(&cfil_lck_rw);
2388 }
2389 done:
2390 cfil_rw_unlock_shared(&cfil_lck_rw);
2391 }
2392
2393 void
2394 cfil_init(void)
2395 {
2396 struct kern_ctl_reg kern_ctl;
2397 errno_t error = 0;
2398 unsigned int mbuf_limit = 0;
2399
2400 CFIL_LOG(LOG_NOTICE, "");
2401
2402 /*
2403 * Compile time verifications
2404 */
2405 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2406 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2407 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2408 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2409
2410 /*
2411 * Runtime time verifications
2412 */
2413 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2414 sizeof(uint32_t)));
2415 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2416 sizeof(uint32_t)));
2417 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2418 sizeof(uint32_t)));
2419 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2420 sizeof(uint32_t)));
2421
2422 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2423 sizeof(uint32_t)));
2424 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2425 sizeof(uint32_t)));
2426
2427 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2428 sizeof(uint32_t)));
2429 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2430 sizeof(uint32_t)));
2431 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2432 sizeof(uint32_t)));
2433 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2434 sizeof(uint32_t)));
2435
2436 /*
2437 * Allocate locks
2438 */
2439 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2440 if (cfil_lck_grp_attr == NULL) {
2441 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2442 /* NOTREACHED */
2443 }
2444 cfil_lck_grp = lck_grp_alloc_init("content filter",
2445 cfil_lck_grp_attr);
2446 if (cfil_lck_grp == NULL) {
2447 panic("%s: lck_grp_alloc_init failed", __func__);
2448 /* NOTREACHED */
2449 }
2450 cfil_lck_attr = lck_attr_alloc_init();
2451 if (cfil_lck_attr == NULL) {
2452 panic("%s: lck_attr_alloc_init failed", __func__);
2453 /* NOTREACHED */
2454 }
2455 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2456
2457 TAILQ_INIT(&cfil_sock_head);
2458 TAILQ_INIT(&cfil_sock_head_stats);
2459
2460 /*
2461 * Register kernel control
2462 */
2463 bzero(&kern_ctl, sizeof(kern_ctl));
2464 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2465 sizeof(kern_ctl.ctl_name));
2466 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2467 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2468 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2469 kern_ctl.ctl_connect = cfil_ctl_connect;
2470 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2471 kern_ctl.ctl_send = cfil_ctl_send;
2472 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2473 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2474 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2475 error = ctl_register(&kern_ctl, &cfil_kctlref);
2476 if (error != 0) {
2477 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2478 return;
2479 }
2480
2481 // Spawn thread for gargage collection
2482 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2483 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2484 panic_plain("%s: Can't create UDP GC thread", __func__);
2485 /* NOTREACHED */
2486 }
2487 /* this must not fail */
2488 VERIFY(cfil_udp_gc_thread != NULL);
2489
2490 // Spawn thread for statistics reporting
2491 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2492 &cfil_stats_report_thread) != KERN_SUCCESS) {
2493 panic_plain("%s: Can't create statistics report thread", __func__);
2494 /* NOTREACHED */
2495 }
2496 /* this must not fail */
2497 VERIFY(cfil_stats_report_thread != NULL);
2498
2499 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2500 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2501 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2502 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2503
2504 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2505 }
2506
2507 struct cfil_info *
2508 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2509 {
2510 int kcunit;
2511 struct cfil_info *cfil_info = NULL;
2512 struct inpcb *inp = sotoinpcb(so);
2513
2514 CFIL_LOG(LOG_INFO, "");
2515
2516 socket_lock_assert_owned(so);
2517
2518 cfil_info = zalloc(cfil_info_zone);
2519 if (cfil_info == NULL) {
2520 goto done;
2521 }
2522 bzero(cfil_info, sizeof(struct cfil_info));
2523 os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2524
2525 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2526 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2527
2528 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2529 struct cfil_entry *entry;
2530
2531 entry = &cfil_info->cfi_entries[kcunit - 1];
2532 entry->cfe_cfil_info = cfil_info;
2533
2534 /* Initialize the filter entry */
2535 entry->cfe_filter = NULL;
2536 entry->cfe_flags = 0;
2537 entry->cfe_necp_control_unit = 0;
2538 entry->cfe_snd.cfe_pass_offset = 0;
2539 entry->cfe_snd.cfe_peek_offset = 0;
2540 entry->cfe_snd.cfe_peeked = 0;
2541 entry->cfe_rcv.cfe_pass_offset = 0;
2542 entry->cfe_rcv.cfe_peek_offset = 0;
2543 entry->cfe_rcv.cfe_peeked = 0;
2544 /*
2545 * Timestamp the last action to avoid pre-maturely
2546 * triggering garbage collection
2547 */
2548 microuptime(&entry->cfe_last_action);
2549
2550 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2551 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2552 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2553 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2554 }
2555
2556 cfil_rw_lock_exclusive(&cfil_lck_rw);
2557
2558 /*
2559 * Create a cfi_sock_id that's not the socket pointer!
2560 */
2561
2562 if (hash_entry == NULL) {
2563 // This is the TCP case, cfil_info is tracked per socket
2564 if (inp->inp_flowhash == 0) {
2565 inp->inp_flowhash = inp_calc_flowhash(inp);
2566 }
2567
2568 so->so_cfil = cfil_info;
2569 cfil_info->cfi_so = so;
2570 cfil_info->cfi_sock_id =
2571 ((so->so_gencnt << 32) | inp->inp_flowhash);
2572 } else {
2573 // This is the UDP case, cfil_info is tracked in per-socket hash
2574 cfil_info->cfi_so = so;
2575 hash_entry->cfentry_cfil = cfil_info;
2576 cfil_info->cfi_hash_entry = hash_entry;
2577 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2578 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2579 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2580
2581 // Wake up gc thread if this is first flow added
2582 if (cfil_sock_udp_attached_count == 0) {
2583 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2584 }
2585
2586 cfil_sock_udp_attached_count++;
2587 }
2588
2589 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2590 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2591
2592 cfil_sock_attached_count++;
2593
2594 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2595
2596 done:
2597 if (cfil_info != NULL) {
2598 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2599 } else {
2600 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2601 }
2602
2603 return cfil_info;
2604 }
2605
2606 int
2607 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2608 {
2609 int kcunit;
2610 int attached = 0;
2611
2612 CFIL_LOG(LOG_INFO, "");
2613
2614 socket_lock_assert_owned(so);
2615
2616 cfil_rw_lock_exclusive(&cfil_lck_rw);
2617
2618 for (kcunit = 1;
2619 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2620 kcunit++) {
2621 struct content_filter *cfc = content_filters[kcunit - 1];
2622 struct cfil_entry *entry;
2623 struct cfil_entry *iter_entry;
2624 struct cfil_entry *iter_prev;
2625
2626 if (cfc == NULL) {
2627 continue;
2628 }
2629 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2630 continue;
2631 }
2632
2633 entry = &cfil_info->cfi_entries[kcunit - 1];
2634
2635 entry->cfe_filter = cfc;
2636 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2637 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2638 cfc->cf_sock_count++;
2639
2640 /* Insert the entry into the list ordered by control unit */
2641 iter_prev = NULL;
2642 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2643 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2644 break;
2645 }
2646 iter_prev = iter_entry;
2647 }
2648
2649 if (iter_prev == NULL) {
2650 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2651 } else {
2652 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2653 }
2654
2655 verify_content_filter(cfc);
2656 attached = 1;
2657 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2658 }
2659
2660 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2661
2662 return attached;
2663 }
2664
2665 static void
2666 cfil_info_free(struct cfil_info *cfil_info)
2667 {
2668 int kcunit;
2669 uint64_t in_drain = 0;
2670 uint64_t out_drained = 0;
2671
2672 if (cfil_info == NULL) {
2673 return;
2674 }
2675
2676 CFIL_LOG(LOG_INFO, "");
2677
2678 cfil_rw_lock_exclusive(&cfil_lck_rw);
2679
2680 for (kcunit = 1;
2681 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2682 kcunit++) {
2683 struct cfil_entry *entry;
2684 struct content_filter *cfc;
2685
2686 entry = &cfil_info->cfi_entries[kcunit - 1];
2687
2688 /* Don't be silly and try to detach twice */
2689 if (entry->cfe_filter == NULL) {
2690 continue;
2691 }
2692
2693 cfc = content_filters[kcunit - 1];
2694
2695 VERIFY(cfc == entry->cfe_filter);
2696
2697 entry->cfe_filter = NULL;
2698 entry->cfe_necp_control_unit = 0;
2699 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2700 cfc->cf_sock_count--;
2701
2702 verify_content_filter(cfc);
2703 }
2704 if (cfil_info->cfi_hash_entry != NULL) {
2705 cfil_sock_udp_attached_count--;
2706 }
2707 cfil_sock_attached_count--;
2708 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2709
2710 // Turn off stats reporting for cfil_info.
2711 cfil_info_stats_toggle(cfil_info, NULL, 0);
2712
2713 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2714 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2715
2716 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2717 struct cfil_entry *entry;
2718
2719 entry = &cfil_info->cfi_entries[kcunit - 1];
2720 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2721 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2722 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2723 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2724 }
2725 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2726
2727 if (out_drained) {
2728 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2729 }
2730 if (in_drain) {
2731 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2732 }
2733
2734 zfree(cfil_info_zone, cfil_info);
2735 }
2736
2737 /*
2738 * Received a verdict from userspace for a socket.
2739 * Perform any delayed operation if needed.
2740 */
2741 static void
2742 cfil_sock_received_verdict(struct socket *so)
2743 {
2744 if (so == NULL || so->so_cfil == NULL) {
2745 return;
2746 }
2747
2748 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2749
2750 /*
2751 * If socket has already been connected, trigger
2752 * soisconnected now.
2753 */
2754 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2755 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2756 soisconnected(so);
2757 return;
2758 }
2759 }
2760
2761 /*
2762 * Entry point from Sockets layer
2763 * The socket is locked.
2764 *
2765 * Checks if a connected socket is subject to filter and
2766 * pending the initial verdict.
2767 */
2768 boolean_t
2769 cfil_sock_connected_pending_verdict(struct socket *so)
2770 {
2771 if (so == NULL || so->so_cfil == NULL) {
2772 return false;
2773 }
2774
2775 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2776 return false;
2777 } else {
2778 /*
2779 * Remember that this protocol is already connected, so
2780 * we will trigger soisconnected() upon receipt of
2781 * initial verdict later.
2782 */
2783 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2784 return true;
2785 }
2786 }
2787
2788 boolean_t
2789 cfil_filter_present(void)
2790 {
2791 return cfil_active_count > 0;
2792 }
2793
2794 /*
2795 * Entry point from Sockets layer
2796 * The socket is locked.
2797 */
2798 errno_t
2799 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2800 {
2801 errno_t error = 0;
2802 uint32_t filter_control_unit;
2803
2804 socket_lock_assert_owned(so);
2805
2806 if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2807 /*
2808 * This socket has already been evaluated (and ultimately skipped) by
2809 * flow divert, so it has also already been through content filter if there
2810 * is one.
2811 */
2812 goto done;
2813 }
2814
2815 /* Limit ourselves to TCP that are not MPTCP subflows */
2816 if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2817 goto done;
2818 }
2819
2820 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2821 if (filter_control_unit == 0) {
2822 goto done;
2823 }
2824
2825 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2826 goto done;
2827 }
2828 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2829 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2830 goto done;
2831 }
2832 if (cfil_active_count == 0) {
2833 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2834 goto done;
2835 }
2836 if (so->so_cfil != NULL) {
2837 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2838 CFIL_LOG(LOG_ERR, "already attached");
2839 } else {
2840 cfil_info_alloc(so, NULL);
2841 if (so->so_cfil == NULL) {
2842 error = ENOMEM;
2843 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2844 goto done;
2845 }
2846 so->so_cfil->cfi_dir = dir;
2847 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
2848 }
2849 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2850 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2851 filter_control_unit);
2852 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2853 goto done;
2854 }
2855 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2856 (uint64_t)VM_KERNEL_ADDRPERM(so),
2857 filter_control_unit, so->so_cfil->cfi_sock_id);
2858
2859 so->so_flags |= SOF_CONTENT_FILTER;
2860 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2861
2862 /* Hold a reference on the socket */
2863 so->so_usecount++;
2864
2865 /*
2866 * Save passed addresses for attach event msg (in case resend
2867 * is needed.
2868 */
2869 if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
2870 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2871 }
2872 if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
2873 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2874 }
2875
2876 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2877 /* We can recover from flow control or out of memory errors */
2878 if (error == ENOBUFS || error == ENOMEM) {
2879 error = 0;
2880 } else if (error != 0) {
2881 goto done;
2882 }
2883
2884 CFIL_INFO_VERIFY(so->so_cfil);
2885 done:
2886 return error;
2887 }
2888
2889 /*
2890 * Entry point from Sockets layer
2891 * The socket is locked.
2892 */
2893 errno_t
2894 cfil_sock_detach(struct socket *so)
2895 {
2896 if (IS_IP_DGRAM(so)) {
2897 cfil_db_free(so);
2898 return 0;
2899 }
2900
2901 if (so->so_cfil) {
2902 if (so->so_flags & SOF_CONTENT_FILTER) {
2903 so->so_flags &= ~SOF_CONTENT_FILTER;
2904 VERIFY(so->so_usecount > 0);
2905 so->so_usecount--;
2906 }
2907 CFIL_INFO_FREE(so->so_cfil);
2908 so->so_cfil = NULL;
2909 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2910 }
2911 return 0;
2912 }
2913
2914 /*
2915 * Fill in the address info of an event message from either
2916 * the socket or passed in address info.
2917 */
2918 static void
2919 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2920 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2921 boolean_t isIPv4, boolean_t outgoing)
2922 {
2923 if (isIPv4) {
2924 struct in_addr laddr = {0}, faddr = {0};
2925 u_int16_t lport = 0, fport = 0;
2926
2927 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2928
2929 if (outgoing) {
2930 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2931 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2932 } else {
2933 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2934 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2935 }
2936 } else {
2937 struct in6_addr *laddr = NULL, *faddr = NULL;
2938 u_int16_t lport = 0, fport = 0;
2939
2940 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2941 if (outgoing) {
2942 fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2943 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2944 } else {
2945 fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2946 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2947 }
2948 }
2949 }
2950
2951 static boolean_t
2952 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2953 struct cfil_info *cfil_info,
2954 struct cfil_msg_sock_attached *msg)
2955 {
2956 struct cfil_crypto_data data = {};
2957
2958 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2959 return false;
2960 }
2961
2962 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2963 data.direction = msg->cfs_conn_dir;
2964
2965 data.pid = msg->cfs_pid;
2966 data.effective_pid = msg->cfs_e_pid;
2967 uuid_copy(data.uuid, msg->cfs_uuid);
2968 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2969 data.socketProtocol = msg->cfs_sock_protocol;
2970 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2971 data.remote.sin6 = msg->cfs_dst.sin6;
2972 data.local.sin6 = msg->cfs_src.sin6;
2973 } else {
2974 data.remote.sin6 = msg->cfs_src.sin6;
2975 data.local.sin6 = msg->cfs_dst.sin6;
2976 }
2977
2978 // At attach, if local address is already present, no need to re-sign subsequent data messages.
2979 if (!NULLADDRESS(data.local)) {
2980 cfil_info->cfi_isSignatureLatest = true;
2981 }
2982
2983 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2984 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2985 msg->cfs_signature_length = 0;
2986 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2987 msg->cfs_msghdr.cfm_sock_id);
2988 return false;
2989 }
2990
2991 return true;
2992 }
2993
2994 static boolean_t
2995 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2996 struct socket *so, struct cfil_info *cfil_info,
2997 struct cfil_msg_data_event *msg)
2998 {
2999 struct cfil_crypto_data data = {};
3000
3001 if (crypto_state == NULL || msg == NULL ||
3002 so == NULL || cfil_info == NULL) {
3003 return false;
3004 }
3005
3006 data.sock_id = cfil_info->cfi_sock_id;
3007 data.direction = cfil_info->cfi_dir;
3008 data.pid = so->last_pid;
3009 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3010 if (so->so_flags & SOF_DELEGATED) {
3011 data.effective_pid = so->e_pid;
3012 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3013 } else {
3014 data.effective_pid = so->last_pid;
3015 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3016 }
3017 data.socketProtocol = so->so_proto->pr_protocol;
3018
3019 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3020 data.remote.sin6 = msg->cfc_dst.sin6;
3021 data.local.sin6 = msg->cfc_src.sin6;
3022 } else {
3023 data.remote.sin6 = msg->cfc_src.sin6;
3024 data.local.sin6 = msg->cfc_dst.sin6;
3025 }
3026
3027 // At first data, local address may show up for the first time, update address cache and
3028 // no need to re-sign subsequent data messages anymore.
3029 if (!NULLADDRESS(data.local)) {
3030 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3031 cfil_info->cfi_isSignatureLatest = true;
3032 }
3033
3034 msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3035 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3036 msg->cfd_signature_length = 0;
3037 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3038 msg->cfd_msghdr.cfm_sock_id);
3039 return false;
3040 }
3041
3042 return true;
3043 }
3044
3045 static boolean_t
3046 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3047 struct socket *so, struct cfil_info *cfil_info,
3048 struct cfil_msg_sock_closed *msg)
3049 {
3050 struct cfil_crypto_data data = {};
3051 struct cfil_hash_entry hash_entry = {};
3052 struct cfil_hash_entry *hash_entry_ptr = NULL;
3053 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3054
3055 if (crypto_state == NULL || msg == NULL ||
3056 so == NULL || inp == NULL || cfil_info == NULL) {
3057 return false;
3058 }
3059
3060 data.sock_id = cfil_info->cfi_sock_id;
3061 data.direction = cfil_info->cfi_dir;
3062
3063 data.pid = so->last_pid;
3064 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3065 if (so->so_flags & SOF_DELEGATED) {
3066 data.effective_pid = so->e_pid;
3067 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3068 } else {
3069 data.effective_pid = so->last_pid;
3070 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3071 }
3072 data.socketProtocol = so->so_proto->pr_protocol;
3073
3074 /*
3075 * Fill in address info:
3076 * For UDP, use the cfil_info hash entry directly.
3077 * For TCP, compose an hash entry with the saved addresses.
3078 */
3079 if (cfil_info->cfi_hash_entry != NULL) {
3080 hash_entry_ptr = cfil_info->cfi_hash_entry;
3081 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3082 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3083 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3084 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3085 hash_entry_ptr = &hash_entry;
3086 }
3087 if (hash_entry_ptr != NULL) {
3088 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3089 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3090 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3091 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3092 }
3093
3094 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3095 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3096
3097 msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3098 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3099 msg->cfc_signature_length = 0;
3100 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3101 msg->cfc_msghdr.cfm_sock_id);
3102 return false;
3103 }
3104
3105 return true;
3106 }
3107
3108 static int
3109 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3110 uint32_t kcunit, int conn_dir)
3111 {
3112 errno_t error = 0;
3113 struct cfil_entry *entry = NULL;
3114 struct cfil_msg_sock_attached msg_attached;
3115 struct content_filter *cfc = NULL;
3116 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3117 struct cfil_hash_entry *hash_entry_ptr = NULL;
3118 struct cfil_hash_entry hash_entry;
3119
3120 memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3121 proc_t p = PROC_NULL;
3122 task_t t = TASK_NULL;
3123
3124 socket_lock_assert_owned(so);
3125
3126 cfil_rw_lock_shared(&cfil_lck_rw);
3127
3128 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3129 error = EINVAL;
3130 goto done;
3131 }
3132
3133 if (kcunit == 0) {
3134 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3135 } else {
3136 entry = &cfil_info->cfi_entries[kcunit - 1];
3137 }
3138
3139 if (entry == NULL) {
3140 goto done;
3141 }
3142
3143 cfc = entry->cfe_filter;
3144 if (cfc == NULL) {
3145 goto done;
3146 }
3147
3148 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3149 goto done;
3150 }
3151
3152 if (kcunit == 0) {
3153 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3154 }
3155
3156 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3157 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3158
3159 /* Would be wasteful to try when flow controlled */
3160 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3161 error = ENOBUFS;
3162 goto done;
3163 }
3164
3165 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3166 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3167 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3168 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3169 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3170 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3171
3172 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3173 msg_attached.cfs_sock_type = so->so_proto->pr_type;
3174 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3175 msg_attached.cfs_pid = so->last_pid;
3176 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3177 if (so->so_flags & SOF_DELEGATED) {
3178 msg_attached.cfs_e_pid = so->e_pid;
3179 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3180 } else {
3181 msg_attached.cfs_e_pid = so->last_pid;
3182 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3183 }
3184
3185 /*
3186 * Fill in address info:
3187 * For UDP, use the cfil_info hash entry directly.
3188 * For TCP, compose an hash entry with the saved addresses.
3189 */
3190 if (cfil_info->cfi_hash_entry != NULL) {
3191 hash_entry_ptr = cfil_info->cfi_hash_entry;
3192 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3193 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3194 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3195 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3196 hash_entry_ptr = &hash_entry;
3197 }
3198 if (hash_entry_ptr != NULL) {
3199 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3200 &msg_attached.cfs_src, &msg_attached.cfs_dst,
3201 !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3202 }
3203 msg_attached.cfs_conn_dir = conn_dir;
3204
3205 if (msg_attached.cfs_e_pid != 0) {
3206 p = proc_find(msg_attached.cfs_e_pid);
3207 if (p != PROC_NULL) {
3208 t = proc_task(p);
3209 if (t != TASK_NULL) {
3210 audit_token_t audit_token;
3211 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3212 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3213 memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3214 } else {
3215 CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3216 entry->cfe_cfil_info->cfi_sock_id);
3217 }
3218 }
3219 proc_rele(p);
3220 }
3221 }
3222
3223 if (cfil_info->cfi_debug) {
3224 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3225 }
3226
3227 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3228
3229 #if LIFECYCLE_DEBUG
3230 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3231 entry->cfe_cfil_info->cfi_sock_id);
3232 #endif
3233
3234 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3235 entry->cfe_filter->cf_kcunit,
3236 &msg_attached,
3237 sizeof(struct cfil_msg_sock_attached),
3238 CTL_DATA_EOR);
3239 if (error != 0) {
3240 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3241 goto done;
3242 }
3243 microuptime(&entry->cfe_last_event);
3244 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3245 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3246
3247 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3248 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3249 done:
3250
3251 /* We can recover from flow control */
3252 if (error == ENOBUFS) {
3253 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3254 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3255
3256 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3257 cfil_rw_lock_exclusive(&cfil_lck_rw);
3258 }
3259
3260 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3261
3262 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3263 } else {
3264 if (error != 0) {
3265 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3266 }
3267
3268 cfil_rw_unlock_shared(&cfil_lck_rw);
3269 }
3270 return error;
3271 }
3272
3273 static int
3274 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3275 {
3276 errno_t error = 0;
3277 struct mbuf *msg = NULL;
3278 struct cfil_entry *entry;
3279 struct cfe_buf *entrybuf;
3280 struct cfil_msg_hdr msg_disconnected;
3281 struct content_filter *cfc;
3282
3283 socket_lock_assert_owned(so);
3284
3285 cfil_rw_lock_shared(&cfil_lck_rw);
3286
3287 entry = &cfil_info->cfi_entries[kcunit - 1];
3288 if (outgoing) {
3289 entrybuf = &entry->cfe_snd;
3290 } else {
3291 entrybuf = &entry->cfe_rcv;
3292 }
3293
3294 cfc = entry->cfe_filter;
3295 if (cfc == NULL) {
3296 goto done;
3297 }
3298
3299 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3300 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3301
3302 /*
3303 * Send the disconnection event once
3304 */
3305 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3306 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3307 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3308 (uint64_t)VM_KERNEL_ADDRPERM(so));
3309 goto done;
3310 }
3311
3312 /*
3313 * We're not disconnected as long as some data is waiting
3314 * to be delivered to the filter
3315 */
3316 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3317 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3318 (uint64_t)VM_KERNEL_ADDRPERM(so));
3319 error = EBUSY;
3320 goto done;
3321 }
3322 /* Would be wasteful to try when flow controlled */
3323 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3324 error = ENOBUFS;
3325 goto done;
3326 }
3327
3328 if (cfil_info->cfi_debug) {
3329 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3330 }
3331
3332 #if LIFECYCLE_DEBUG
3333 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3334 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3335 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3336 #endif
3337
3338 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3339 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3340 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3341 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3342 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3343 CFM_OP_DISCONNECT_IN;
3344 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3345 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3346 entry->cfe_filter->cf_kcunit,
3347 &msg_disconnected,
3348 sizeof(struct cfil_msg_hdr),
3349 CTL_DATA_EOR);
3350 if (error != 0) {
3351 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3352 mbuf_freem(msg);
3353 goto done;
3354 }
3355 microuptime(&entry->cfe_last_event);
3356 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3357
3358 /* Remember we have sent the disconnection message */
3359 if (outgoing) {
3360 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3361 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3362 } else {
3363 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3364 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3365 }
3366 done:
3367 if (error == ENOBUFS) {
3368 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3369 OSIncrementAtomic(
3370 &cfil_stats.cfs_disconnect_event_flow_control);
3371
3372 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3373 cfil_rw_lock_exclusive(&cfil_lck_rw);
3374 }
3375
3376 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3377
3378 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3379 } else {
3380 if (error != 0) {
3381 OSIncrementAtomic(
3382 &cfil_stats.cfs_disconnect_event_fail);
3383 }
3384
3385 cfil_rw_unlock_shared(&cfil_lck_rw);
3386 }
3387 return error;
3388 }
3389
3390 int
3391 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3392 {
3393 struct cfil_entry *entry;
3394 struct cfil_msg_sock_closed msg_closed;
3395 errno_t error = 0;
3396 struct content_filter *cfc;
3397
3398 socket_lock_assert_owned(so);
3399
3400 cfil_rw_lock_shared(&cfil_lck_rw);
3401
3402 entry = &cfil_info->cfi_entries[kcunit - 1];
3403 cfc = entry->cfe_filter;
3404 if (cfc == NULL) {
3405 goto done;
3406 }
3407
3408 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3409 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3410
3411 /* Would be wasteful to try when flow controlled */
3412 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3413 error = ENOBUFS;
3414 goto done;
3415 }
3416 /*
3417 * Send a single closed message per filter
3418 */
3419 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3420 goto done;
3421 }
3422 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3423 goto done;
3424 }
3425
3426 microuptime(&entry->cfe_last_event);
3427 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3428
3429 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3430 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3431 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3432 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3433 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3434 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3435 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3436 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3437 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3438 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3439 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3440 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3441 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3442
3443 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3444
3445 if (cfil_info->cfi_debug) {
3446 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3447 }
3448
3449 #if LIFECYCLE_DEBUG
3450 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3451 #endif
3452 /* for debugging
3453 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3454 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3455 * }
3456 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3457 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3458 * }
3459 */
3460
3461 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3462 entry->cfe_filter->cf_kcunit,
3463 &msg_closed,
3464 sizeof(struct cfil_msg_sock_closed),
3465 CTL_DATA_EOR);
3466 if (error != 0) {
3467 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3468 error);
3469 goto done;
3470 }
3471
3472 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3473 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3474 done:
3475 /* We can recover from flow control */
3476 if (error == ENOBUFS) {
3477 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3478 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3479
3480 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3481 cfil_rw_lock_exclusive(&cfil_lck_rw);
3482 }
3483
3484 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3485
3486 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3487 } else {
3488 if (error != 0) {
3489 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3490 }
3491
3492 cfil_rw_unlock_shared(&cfil_lck_rw);
3493 }
3494
3495 return error;
3496 }
3497
3498 static void
3499 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3500 struct in6_addr *ip6, u_int16_t port)
3501 {
3502 if (sin46 == NULL) {
3503 return;
3504 }
3505
3506 struct sockaddr_in6 *sin6 = &sin46->sin6;
3507
3508 sin6->sin6_family = AF_INET6;
3509 sin6->sin6_len = sizeof(*sin6);
3510 sin6->sin6_port = port;
3511 sin6->sin6_addr = *ip6;
3512 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3513 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3514 sin6->sin6_addr.s6_addr16[1] = 0;
3515 }
3516 }
3517
3518 static void
3519 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3520 struct in_addr ip, u_int16_t port)
3521 {
3522 if (sin46 == NULL) {
3523 return;
3524 }
3525
3526 struct sockaddr_in *sin = &sin46->sin;
3527
3528 sin->sin_family = AF_INET;
3529 sin->sin_len = sizeof(*sin);
3530 sin->sin_port = port;
3531 sin->sin_addr.s_addr = ip.s_addr;
3532 }
3533
3534 static void
3535 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3536 struct in6_addr **laddr, struct in6_addr **faddr,
3537 u_int16_t *lport, u_int16_t *fport)
3538 {
3539 if (entry != NULL) {
3540 *laddr = &entry->cfentry_laddr.addr6;
3541 *faddr = &entry->cfentry_faddr.addr6;
3542 *lport = entry->cfentry_lport;
3543 *fport = entry->cfentry_fport;
3544 } else {
3545 *laddr = &inp->in6p_laddr;
3546 *faddr = &inp->in6p_faddr;
3547 *lport = inp->inp_lport;
3548 *fport = inp->inp_fport;
3549 }
3550 }
3551
3552 static void
3553 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3554 struct in_addr *laddr, struct in_addr *faddr,
3555 u_int16_t *lport, u_int16_t *fport)
3556 {
3557 if (entry != NULL) {
3558 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3559 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3560 *lport = entry->cfentry_lport;
3561 *fport = entry->cfentry_fport;
3562 } else {
3563 *laddr = inp->inp_laddr;
3564 *faddr = inp->inp_faddr;
3565 *lport = inp->inp_lport;
3566 *fport = inp->inp_fport;
3567 }
3568 }
3569
3570 static int
3571 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3572 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3573 {
3574 errno_t error = 0;
3575 struct mbuf *copy = NULL;
3576 struct mbuf *msg = NULL;
3577 unsigned int one = 1;
3578 struct cfil_msg_data_event *data_req;
3579 size_t hdrsize;
3580 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3581 struct cfil_entry *entry;
3582 struct cfe_buf *entrybuf;
3583 struct content_filter *cfc;
3584 struct timeval tv;
3585 int inp_flags = 0;
3586
3587 cfil_rw_lock_shared(&cfil_lck_rw);
3588
3589 entry = &cfil_info->cfi_entries[kcunit - 1];
3590 if (outgoing) {
3591 entrybuf = &entry->cfe_snd;
3592 } else {
3593 entrybuf = &entry->cfe_rcv;
3594 }
3595
3596 cfc = entry->cfe_filter;
3597 if (cfc == NULL) {
3598 goto done;
3599 }
3600
3601 data = cfil_data_start(data);
3602 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3603 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3604 goto done;
3605 }
3606
3607 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3608 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3609
3610 socket_lock_assert_owned(so);
3611
3612 /* Would be wasteful to try */
3613 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3614 error = ENOBUFS;
3615 goto done;
3616 }
3617
3618 /* Make a copy of the data to pass to kernel control socket */
3619 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3620 M_COPYM_NOOP_HDR);
3621 if (copy == NULL) {
3622 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3623 error = ENOMEM;
3624 goto done;
3625 }
3626
3627 /* We need an mbuf packet for the message header */
3628 hdrsize = sizeof(struct cfil_msg_data_event);
3629 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3630 if (error != 0) {
3631 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3632 m_freem(copy);
3633 /*
3634 * ENOBUFS is to indicate flow control
3635 */
3636 error = ENOMEM;
3637 goto done;
3638 }
3639 mbuf_setlen(msg, hdrsize);
3640 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3641 msg->m_next = copy;
3642 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3643 bzero(data_req, hdrsize);
3644 data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3645 data_req->cfd_msghdr.cfm_version = 1;
3646 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3647 data_req->cfd_msghdr.cfm_op =
3648 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3649 data_req->cfd_msghdr.cfm_sock_id =
3650 entry->cfe_cfil_info->cfi_sock_id;
3651 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3652 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3653
3654 data_req->cfd_flags = 0;
3655 if (OPTIONAL_IP_HEADER(so)) {
3656 /*
3657 * For non-UDP/TCP traffic, indicate to filters if optional
3658 * IP header is present:
3659 * outgoing - indicate according to INP_HDRINCL flag
3660 * incoming - For IPv4 only, stripping of IP header is
3661 * optional. But for CFIL, we delay stripping
3662 * at rip_input. So CFIL always expects IP
3663 * frames. IP header will be stripped according
3664 * to INP_STRIPHDR flag later at reinjection.
3665 */
3666 if ((!outgoing && !IS_INP_V6(inp)) ||
3667 (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3668 data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3669 }
3670 }
3671
3672 /*
3673 * Copy address/port into event msg.
3674 * For non connected sockets need to copy addresses from passed
3675 * parameters
3676 */
3677 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3678 &data_req->cfc_src, &data_req->cfc_dst,
3679 !IS_INP_V6(inp), outgoing);
3680
3681 if (cfil_info->cfi_debug) {
3682 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3683 }
3684
3685 if (cfil_info->cfi_isSignatureLatest == false) {
3686 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3687 }
3688
3689 microuptime(&tv);
3690 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3691
3692 /* Pass the message to the content filter */
3693 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3694 entry->cfe_filter->cf_kcunit,
3695 msg, CTL_DATA_EOR);
3696 if (error != 0) {
3697 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3698 mbuf_freem(msg);
3699 goto done;
3700 }
3701 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3702 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3703
3704 #if VERDICT_DEBUG
3705 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3706 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3707 #endif
3708
3709 if (cfil_info->cfi_debug) {
3710 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3711 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3712 data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3713 }
3714
3715 done:
3716 if (error == ENOBUFS) {
3717 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3718 OSIncrementAtomic(
3719 &cfil_stats.cfs_data_event_flow_control);
3720
3721 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3722 cfil_rw_lock_exclusive(&cfil_lck_rw);
3723 }
3724
3725 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3726
3727 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3728 } else {
3729 if (error != 0) {
3730 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3731 }
3732
3733 cfil_rw_unlock_shared(&cfil_lck_rw);
3734 }
3735 return error;
3736 }
3737
3738 /*
3739 * Process the queue of data waiting to be delivered to content filter
3740 */
3741 static int
3742 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3743 {
3744 errno_t error = 0;
3745 struct mbuf *data, *tmp = NULL;
3746 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3747 struct cfil_entry *entry;
3748 struct cfe_buf *entrybuf;
3749 uint64_t currentoffset = 0;
3750
3751 if (cfil_info == NULL) {
3752 return 0;
3753 }
3754
3755 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3756 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3757
3758 socket_lock_assert_owned(so);
3759
3760 entry = &cfil_info->cfi_entries[kcunit - 1];
3761 if (outgoing) {
3762 entrybuf = &entry->cfe_snd;
3763 } else {
3764 entrybuf = &entry->cfe_rcv;
3765 }
3766
3767 /* Send attached message if not yet done */
3768 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3769 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3770 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3771 if (error != 0) {
3772 /* We can recover from flow control */
3773 if (error == ENOBUFS || error == ENOMEM) {
3774 error = 0;
3775 }
3776 goto done;
3777 }
3778 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3779 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3780 goto done;
3781 }
3782
3783 #if DATA_DEBUG
3784 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3785 entrybuf->cfe_pass_offset,
3786 entrybuf->cfe_peeked,
3787 entrybuf->cfe_peek_offset);
3788 #endif
3789
3790 /* Move all data that can pass */
3791 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3792 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3793 datalen = cfil_data_length(data, NULL, NULL);
3794 tmp = data;
3795
3796 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3797 entrybuf->cfe_pass_offset) {
3798 /*
3799 * The first mbuf can fully pass
3800 */
3801 copylen = datalen;
3802 } else {
3803 /*
3804 * The first mbuf can partially pass
3805 */
3806 copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
3807 }
3808 VERIFY(copylen <= datalen);
3809
3810 #if DATA_DEBUG
3811 CFIL_LOG(LOG_DEBUG,
3812 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3813 "datalen %u copylen %u",
3814 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3815 entrybuf->cfe_ctl_q.q_start,
3816 entrybuf->cfe_peeked,
3817 entrybuf->cfe_pass_offset,
3818 entrybuf->cfe_peek_offset,
3819 datalen, copylen);
3820 #endif
3821
3822 /*
3823 * Data that passes has been peeked at explicitly or
3824 * implicitly
3825 */
3826 if (entrybuf->cfe_ctl_q.q_start + copylen >
3827 entrybuf->cfe_peeked) {
3828 entrybuf->cfe_peeked =
3829 entrybuf->cfe_ctl_q.q_start + copylen;
3830 }
3831 /*
3832 * Stop on partial pass
3833 */
3834 if (copylen < datalen) {
3835 break;
3836 }
3837
3838 /* All good, move full data from ctl queue to pending queue */
3839 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3840
3841 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3842 if (outgoing) {
3843 OSAddAtomic64(datalen,
3844 &cfil_stats.cfs_pending_q_out_enqueued);
3845 } else {
3846 OSAddAtomic64(datalen,
3847 &cfil_stats.cfs_pending_q_in_enqueued);
3848 }
3849 }
3850 CFIL_INFO_VERIFY(cfil_info);
3851 if (tmp != NULL) {
3852 CFIL_LOG(LOG_DEBUG,
3853 "%llx first %llu peeked %llu pass %llu peek %llu"
3854 "datalen %u copylen %u",
3855 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3856 entrybuf->cfe_ctl_q.q_start,
3857 entrybuf->cfe_peeked,
3858 entrybuf->cfe_pass_offset,
3859 entrybuf->cfe_peek_offset,
3860 datalen, copylen);
3861 }
3862 tmp = NULL;
3863
3864 /* Now deal with remaining data the filter wants to peek at */
3865 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3866 currentoffset = entrybuf->cfe_ctl_q.q_start;
3867 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3868 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3869 currentoffset += datalen) {
3870 datalen = cfil_data_length(data, NULL, NULL);
3871 tmp = data;
3872
3873 /* We've already peeked at this mbuf */
3874 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3875 continue;
3876 }
3877 /*
3878 * The data in the first mbuf may have been
3879 * partially peeked at
3880 */
3881 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
3882 VERIFY(copyoffset < datalen);
3883 copylen = datalen - copyoffset;
3884 VERIFY(copylen <= datalen);
3885 /*
3886 * Do not copy more than needed
3887 */
3888 if (currentoffset + copyoffset + copylen >
3889 entrybuf->cfe_peek_offset) {
3890 copylen = (unsigned int)(entrybuf->cfe_peek_offset -
3891 (currentoffset + copyoffset));
3892 }
3893
3894 #if DATA_DEBUG
3895 CFIL_LOG(LOG_DEBUG,
3896 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3897 "datalen %u copylen %u copyoffset %u",
3898 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3899 currentoffset,
3900 entrybuf->cfe_peeked,
3901 entrybuf->cfe_pass_offset,
3902 entrybuf->cfe_peek_offset,
3903 datalen, copylen, copyoffset);
3904 #endif
3905
3906 /*
3907 * Stop if there is nothing more to peek at
3908 */
3909 if (copylen == 0) {
3910 break;
3911 }
3912 /*
3913 * Let the filter get a peek at this span of data
3914 */
3915 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3916 outgoing, data, copyoffset, copylen);
3917 if (error != 0) {
3918 /* On error, leave data in ctl_q */
3919 break;
3920 }
3921 entrybuf->cfe_peeked += copylen;
3922 if (outgoing) {
3923 OSAddAtomic64(copylen,
3924 &cfil_stats.cfs_ctl_q_out_peeked);
3925 } else {
3926 OSAddAtomic64(copylen,
3927 &cfil_stats.cfs_ctl_q_in_peeked);
3928 }
3929
3930 /* Stop when data could not be fully peeked at */
3931 if (copylen + copyoffset < datalen) {
3932 break;
3933 }
3934 }
3935 CFIL_INFO_VERIFY(cfil_info);
3936 if (tmp != NULL) {
3937 CFIL_LOG(LOG_DEBUG,
3938 "%llx first %llu peeked %llu pass %llu peek %llu"
3939 "datalen %u copylen %u copyoffset %u",
3940 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3941 currentoffset,
3942 entrybuf->cfe_peeked,
3943 entrybuf->cfe_pass_offset,
3944 entrybuf->cfe_peek_offset,
3945 datalen, copylen, copyoffset);
3946 }
3947
3948 /*
3949 * Process data that has passed the filter
3950 */
3951 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3952 if (error != 0) {
3953 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3954 error);
3955 goto done;
3956 }
3957
3958 /*
3959 * Dispatch disconnect events that could not be sent
3960 */
3961 if (cfil_info == NULL) {
3962 goto done;
3963 } else if (outgoing) {
3964 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3965 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3966 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3967 }
3968 } else {
3969 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3970 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3971 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3972 }
3973 }
3974
3975 done:
3976 CFIL_LOG(LOG_DEBUG,
3977 "first %llu peeked %llu pass %llu peek %llu",
3978 entrybuf->cfe_ctl_q.q_start,
3979 entrybuf->cfe_peeked,
3980 entrybuf->cfe_pass_offset,
3981 entrybuf->cfe_peek_offset);
3982
3983 CFIL_INFO_VERIFY(cfil_info);
3984 return error;
3985 }
3986
3987 /*
3988 * cfil_data_filter()
3989 *
3990 * Process data for a content filter installed on a socket
3991 */
3992 int
3993 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3994 struct mbuf *data, uint32_t datalen)
3995 {
3996 errno_t error = 0;
3997 struct cfil_entry *entry;
3998 struct cfe_buf *entrybuf;
3999
4000 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4001 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4002
4003 socket_lock_assert_owned(so);
4004
4005 entry = &cfil_info->cfi_entries[kcunit - 1];
4006 if (outgoing) {
4007 entrybuf = &entry->cfe_snd;
4008 } else {
4009 entrybuf = &entry->cfe_rcv;
4010 }
4011
4012 /* Are we attached to the filter? */
4013 if (entry->cfe_filter == NULL) {
4014 error = 0;
4015 goto done;
4016 }
4017
4018 /* Dispatch to filters */
4019 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4020 if (outgoing) {
4021 OSAddAtomic64(datalen,
4022 &cfil_stats.cfs_ctl_q_out_enqueued);
4023 } else {
4024 OSAddAtomic64(datalen,
4025 &cfil_stats.cfs_ctl_q_in_enqueued);
4026 }
4027
4028 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4029 if (error != 0) {
4030 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4031 error);
4032 }
4033 /*
4034 * We have to return EJUSTRETURN in all cases to avoid double free
4035 * by socket layer
4036 */
4037 error = EJUSTRETURN;
4038 done:
4039 CFIL_INFO_VERIFY(cfil_info);
4040
4041 CFIL_LOG(LOG_INFO, "return %d", error);
4042 return error;
4043 }
4044
4045 /*
4046 * cfil_service_inject_queue() re-inject data that passed the
4047 * content filters
4048 */
4049 static int
4050 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4051 {
4052 mbuf_t data;
4053 unsigned int datalen;
4054 int mbcnt = 0;
4055 int mbnum = 0;
4056 errno_t error = 0;
4057 struct cfi_buf *cfi_buf;
4058 struct cfil_queue *inject_q;
4059 int need_rwakeup = 0;
4060 int count = 0;
4061 struct inpcb *inp = NULL;
4062 struct ip *ip = NULL;
4063 unsigned int hlen;
4064
4065 if (cfil_info == NULL) {
4066 return 0;
4067 }
4068
4069 socket_lock_assert_owned(so);
4070
4071 if (outgoing) {
4072 cfi_buf = &cfil_info->cfi_snd;
4073 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4074 } else {
4075 cfi_buf = &cfil_info->cfi_rcv;
4076 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4077 }
4078 inject_q = &cfi_buf->cfi_inject_q;
4079
4080 if (cfil_queue_empty(inject_q)) {
4081 return 0;
4082 }
4083
4084 #if DATA_DEBUG | VERDICT_DEBUG
4085 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4086 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4087 #endif
4088
4089 while ((data = cfil_queue_first(inject_q)) != NULL) {
4090 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4091
4092 #if DATA_DEBUG
4093 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4094 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4095 #endif
4096 if (cfil_info->cfi_debug) {
4097 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4098 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4099 }
4100
4101 /* Remove data from queue and adjust stats */
4102 cfil_queue_remove(inject_q, data, datalen);
4103 cfi_buf->cfi_pending_first += datalen;
4104 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4105 cfi_buf->cfi_pending_mbnum -= mbnum;
4106 cfil_info_buf_verify(cfi_buf);
4107
4108 if (outgoing) {
4109 error = sosend_reinject(so, NULL, data, NULL, 0);
4110 if (error != 0) {
4111 #if DATA_DEBUG
4112 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4113 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4114 #endif
4115 break;
4116 }
4117 // At least one injection succeeded, need to wake up pending threads.
4118 need_rwakeup = 1;
4119 } else {
4120 data->m_flags |= M_SKIPCFIL;
4121
4122 /*
4123 * NOTE: We currently only support TCP, UDP, ICMP,
4124 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4125 * need to call the appropriate sbappendxxx()
4126 * of fix sock_inject_data_in()
4127 */
4128 if (IS_IP_DGRAM(so)) {
4129 if (OPTIONAL_IP_HEADER(so)) {
4130 inp = sotoinpcb(so);
4131 if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4132 mbuf_t data_start = cfil_data_start(data);
4133 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4134 ip = mtod(data_start, struct ip *);
4135 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4136 data_start->m_len -= hlen;
4137 data_start->m_pkthdr.len -= hlen;
4138 data_start->m_data += hlen;
4139 }
4140 }
4141 }
4142
4143 if (sbappendchain(&so->so_rcv, data, 0)) {
4144 need_rwakeup = 1;
4145 }
4146 } else {
4147 if (sbappendstream(&so->so_rcv, data)) {
4148 need_rwakeup = 1;
4149 }
4150 }
4151 }
4152
4153 if (outgoing) {
4154 OSAddAtomic64(datalen,
4155 &cfil_stats.cfs_inject_q_out_passed);
4156 } else {
4157 OSAddAtomic64(datalen,
4158 &cfil_stats.cfs_inject_q_in_passed);
4159 }
4160
4161 count++;
4162 }
4163
4164 #if DATA_DEBUG | VERDICT_DEBUG
4165 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4166 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4167 #endif
4168 if (cfil_info->cfi_debug) {
4169 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4170 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4171 }
4172
4173 /* A single wakeup is for several packets is more efficient */
4174 if (need_rwakeup) {
4175 if (outgoing == TRUE) {
4176 sowwakeup(so);
4177 } else {
4178 sorwakeup(so);
4179 }
4180 }
4181
4182 if (error != 0 && cfil_info) {
4183 if (error == ENOBUFS) {
4184 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4185 }
4186 if (error == ENOMEM) {
4187 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4188 }
4189
4190 if (outgoing) {
4191 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4192 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4193 } else {
4194 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4195 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4196 }
4197 }
4198
4199 /*
4200 * Notify
4201 */
4202 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4203 cfil_sock_notify_shutdown(so, SHUT_WR);
4204 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4205 soshutdownlock_final(so, SHUT_WR);
4206 }
4207 }
4208 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4209 if (cfil_filters_attached(so) == 0) {
4210 CFIL_LOG(LOG_INFO, "so %llx waking",
4211 (uint64_t)VM_KERNEL_ADDRPERM(so));
4212 wakeup((caddr_t)cfil_info);
4213 }
4214 }
4215
4216 CFIL_INFO_VERIFY(cfil_info);
4217
4218 return error;
4219 }
4220
4221 static int
4222 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4223 {
4224 uint64_t passlen, curlen;
4225 mbuf_t data;
4226 unsigned int datalen;
4227 errno_t error = 0;
4228 struct cfil_entry *entry;
4229 struct cfe_buf *entrybuf;
4230 struct cfil_queue *pending_q;
4231
4232 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4233 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4234
4235 socket_lock_assert_owned(so);
4236
4237 entry = &cfil_info->cfi_entries[kcunit - 1];
4238 if (outgoing) {
4239 entrybuf = &entry->cfe_snd;
4240 } else {
4241 entrybuf = &entry->cfe_rcv;
4242 }
4243
4244 pending_q = &entrybuf->cfe_pending_q;
4245
4246 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4247
4248 /*
4249 * Locate the chunks of data that we can pass to the next filter
4250 * A data chunk must be on mbuf boundaries
4251 */
4252 curlen = 0;
4253 while ((data = cfil_queue_first(pending_q)) != NULL) {
4254 struct cfil_entry *iter_entry;
4255 datalen = cfil_data_length(data, NULL, NULL);
4256
4257 #if DATA_DEBUG
4258 CFIL_LOG(LOG_DEBUG,
4259 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4260 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4261 passlen, curlen);
4262 #endif
4263
4264 if (curlen + datalen > passlen) {
4265 break;
4266 }
4267
4268 cfil_queue_remove(pending_q, data, datalen);
4269
4270 curlen += datalen;
4271
4272 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4273 iter_entry != NULL;
4274 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4275 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4276 data, datalen);
4277 /* 0 means passed so we can continue */
4278 if (error != 0) {
4279 break;
4280 }
4281 }
4282 /* When data has passed all filters, re-inject */
4283 if (error == 0) {
4284 if (outgoing) {
4285 cfil_queue_enqueue(
4286 &cfil_info->cfi_snd.cfi_inject_q,
4287 data, datalen);
4288 OSAddAtomic64(datalen,
4289 &cfil_stats.cfs_inject_q_out_enqueued);
4290 } else {
4291 cfil_queue_enqueue(
4292 &cfil_info->cfi_rcv.cfi_inject_q,
4293 data, datalen);
4294 OSAddAtomic64(datalen,
4295 &cfil_stats.cfs_inject_q_in_enqueued);
4296 }
4297 }
4298 }
4299
4300 CFIL_INFO_VERIFY(cfil_info);
4301
4302 return error;
4303 }
4304
4305 int
4306 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4307 uint64_t pass_offset, uint64_t peek_offset)
4308 {
4309 errno_t error = 0;
4310 struct cfil_entry *entry = NULL;
4311 struct cfe_buf *entrybuf;
4312 int updated = 0;
4313
4314 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4315
4316 socket_lock_assert_owned(so);
4317
4318 if (cfil_info == NULL) {
4319 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4320 (uint64_t)VM_KERNEL_ADDRPERM(so));
4321 error = 0;
4322 goto done;
4323 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4324 CFIL_LOG(LOG_ERR, "so %llx drop set",
4325 (uint64_t)VM_KERNEL_ADDRPERM(so));
4326 error = EPIPE;
4327 goto done;
4328 }
4329
4330 entry = &cfil_info->cfi_entries[kcunit - 1];
4331 if (outgoing) {
4332 entrybuf = &entry->cfe_snd;
4333 } else {
4334 entrybuf = &entry->cfe_rcv;
4335 }
4336
4337 /* Record updated offsets for this content filter */
4338 if (pass_offset > entrybuf->cfe_pass_offset) {
4339 entrybuf->cfe_pass_offset = pass_offset;
4340
4341 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4342 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4343 }
4344 updated = 1;
4345 } else {
4346 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4347 pass_offset, entrybuf->cfe_pass_offset);
4348 }
4349 /* Filter does not want or need to see data that's allowed to pass */
4350 if (peek_offset > entrybuf->cfe_pass_offset &&
4351 peek_offset > entrybuf->cfe_peek_offset) {
4352 entrybuf->cfe_peek_offset = peek_offset;
4353 updated = 1;
4354 }
4355 /* Nothing to do */
4356 if (updated == 0) {
4357 goto done;
4358 }
4359
4360 /* Move data held in control queue to pending queue if needed */
4361 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4362 if (error != 0) {
4363 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4364 error);
4365 goto done;
4366 }
4367 error = EJUSTRETURN;
4368
4369 done:
4370 /*
4371 * The filter is effectively detached when pass all from both sides
4372 * or when the socket is closed and no more data is waiting
4373 * to be delivered to the filter
4374 */
4375 if (entry != NULL &&
4376 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4377 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4378 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4379 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4380 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4381 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4382 #if LIFECYCLE_DEBUG
4383 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4384 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4385 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4386 #endif
4387 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4388 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4389 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4390 cfil_filters_attached(so) == 0) {
4391 #if LIFECYCLE_DEBUG
4392 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4393 #endif
4394 CFIL_LOG(LOG_INFO, "so %llx waking",
4395 (uint64_t)VM_KERNEL_ADDRPERM(so));
4396 wakeup((caddr_t)cfil_info);
4397 }
4398 }
4399 CFIL_INFO_VERIFY(cfil_info);
4400 CFIL_LOG(LOG_INFO, "return %d", error);
4401 return error;
4402 }
4403
4404 /*
4405 * Update pass offset for socket when no data is pending
4406 */
4407 static int
4408 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4409 {
4410 struct cfi_buf *cfi_buf;
4411 struct cfil_entry *entry;
4412 struct cfe_buf *entrybuf;
4413 uint32_t kcunit;
4414 uint64_t pass_offset = 0;
4415 boolean_t first = true;
4416
4417 if (cfil_info == NULL) {
4418 return 0;
4419 }
4420
4421 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4422 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4423
4424 socket_lock_assert_owned(so);
4425
4426 if (outgoing) {
4427 cfi_buf = &cfil_info->cfi_snd;
4428 } else {
4429 cfi_buf = &cfil_info->cfi_rcv;
4430 }
4431
4432 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4433 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4434 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4435
4436 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4437 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4438 entry = &cfil_info->cfi_entries[kcunit - 1];
4439
4440 /* Are we attached to a filter? */
4441 if (entry->cfe_filter == NULL) {
4442 continue;
4443 }
4444
4445 if (outgoing) {
4446 entrybuf = &entry->cfe_snd;
4447 } else {
4448 entrybuf = &entry->cfe_rcv;
4449 }
4450
4451 // Keep track of the smallest pass_offset among filters.
4452 if (first == true ||
4453 entrybuf->cfe_pass_offset < pass_offset) {
4454 pass_offset = entrybuf->cfe_pass_offset;
4455 first = false;
4456 }
4457 }
4458 cfi_buf->cfi_pass_offset = pass_offset;
4459 }
4460
4461 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4462 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4463
4464 return 0;
4465 }
4466
4467 int
4468 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4469 uint64_t pass_offset, uint64_t peek_offset)
4470 {
4471 errno_t error = 0;
4472
4473 CFIL_LOG(LOG_INFO, "");
4474
4475 socket_lock_assert_owned(so);
4476
4477 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4478 if (error != 0) {
4479 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4480 (uint64_t)VM_KERNEL_ADDRPERM(so),
4481 outgoing ? "out" : "in");
4482 goto release;
4483 }
4484
4485 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4486 pass_offset, peek_offset);
4487
4488 cfil_service_inject_queue(so, cfil_info, outgoing);
4489
4490 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4491 release:
4492 CFIL_INFO_VERIFY(cfil_info);
4493 cfil_release_sockbuf(so, outgoing);
4494
4495 return error;
4496 }
4497
4498
4499 static void
4500 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4501 {
4502 struct cfil_entry *entry;
4503 int kcunit;
4504 uint64_t drained;
4505
4506 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4507 goto done;
4508 }
4509
4510 socket_lock_assert_owned(so);
4511
4512 /*
4513 * Flush the output queues and ignore errors as long as
4514 * we are attached
4515 */
4516 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4517 if (cfil_info != NULL) {
4518 drained = 0;
4519 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4520 entry = &cfil_info->cfi_entries[kcunit - 1];
4521
4522 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4523 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4524 }
4525 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4526
4527 if (drained) {
4528 if (cfil_info->cfi_flags & CFIF_DROP) {
4529 OSIncrementAtomic(
4530 &cfil_stats.cfs_flush_out_drop);
4531 } else {
4532 OSIncrementAtomic(
4533 &cfil_stats.cfs_flush_out_close);
4534 }
4535 }
4536 }
4537 cfil_release_sockbuf(so, 1);
4538
4539 /*
4540 * Flush the input queues
4541 */
4542 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4543 if (cfil_info != NULL) {
4544 drained = 0;
4545 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4546 entry = &cfil_info->cfi_entries[kcunit - 1];
4547
4548 drained += cfil_queue_drain(
4549 &entry->cfe_rcv.cfe_ctl_q);
4550 drained += cfil_queue_drain(
4551 &entry->cfe_rcv.cfe_pending_q);
4552 }
4553 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4554
4555 if (drained) {
4556 if (cfil_info->cfi_flags & CFIF_DROP) {
4557 OSIncrementAtomic(
4558 &cfil_stats.cfs_flush_in_drop);
4559 } else {
4560 OSIncrementAtomic(
4561 &cfil_stats.cfs_flush_in_close);
4562 }
4563 }
4564 }
4565 cfil_release_sockbuf(so, 0);
4566 done:
4567 CFIL_INFO_VERIFY(cfil_info);
4568 }
4569
4570 int
4571 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4572 {
4573 errno_t error = 0;
4574 struct cfil_entry *entry;
4575 struct proc *p;
4576
4577 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4578 goto done;
4579 }
4580
4581 socket_lock_assert_owned(so);
4582
4583 entry = &cfil_info->cfi_entries[kcunit - 1];
4584
4585 /* Are we attached to the filter? */
4586 if (entry->cfe_filter == NULL) {
4587 goto done;
4588 }
4589
4590 cfil_info->cfi_flags |= CFIF_DROP;
4591
4592 p = current_proc();
4593
4594 /*
4595 * Force the socket to be marked defunct
4596 * (forcing fixed along with rdar://19391339)
4597 */
4598 if (so->so_cfil_db == NULL) {
4599 error = sosetdefunct(p, so,
4600 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4601 FALSE);
4602
4603 /* Flush the socket buffer and disconnect */
4604 if (error == 0) {
4605 error = sodefunct(p, so,
4606 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4607 }
4608 }
4609
4610 /* The filter is done, mark as detached */
4611 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4612 #if LIFECYCLE_DEBUG
4613 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4614 #endif
4615 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4616 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4617
4618 /* Pending data needs to go */
4619 cfil_flush_queues(so, cfil_info);
4620
4621 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4622 if (cfil_filters_attached(so) == 0) {
4623 CFIL_LOG(LOG_INFO, "so %llx waking",
4624 (uint64_t)VM_KERNEL_ADDRPERM(so));
4625 wakeup((caddr_t)cfil_info);
4626 }
4627 }
4628 done:
4629 return error;
4630 }
4631
4632 int
4633 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4634 {
4635 errno_t error = 0;
4636 struct cfil_info *cfil_info = NULL;
4637
4638 bool cfil_attached = false;
4639 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4640
4641 // Search and lock socket
4642 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4643 if (so == NULL) {
4644 error = ENOENT;
4645 } else {
4646 // The client gets a pass automatically
4647 cfil_info = (so->so_cfil_db != NULL) ?
4648 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4649
4650 if (cfil_attached) {
4651 #if VERDICT_DEBUG
4652 if (cfil_info != NULL) {
4653 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4654 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4655 (uint64_t)VM_KERNEL_ADDRPERM(so),
4656 cfil_info->cfi_sock_id);
4657 }
4658 #endif
4659 cfil_sock_received_verdict(so);
4660 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4661 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4662 } else {
4663 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4664 }
4665 socket_unlock(so, 1);
4666 }
4667
4668 return error;
4669 }
4670
4671 int
4672 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4673 {
4674 struct content_filter *cfc = NULL;
4675 cfil_crypto_state_t crypto_state = NULL;
4676 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4677
4678 CFIL_LOG(LOG_NOTICE, "");
4679
4680 if (content_filters == NULL) {
4681 CFIL_LOG(LOG_ERR, "no content filter");
4682 return EINVAL;
4683 }
4684 if (kcunit > MAX_CONTENT_FILTER) {
4685 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4686 kcunit, MAX_CONTENT_FILTER);
4687 return EINVAL;
4688 }
4689 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4690 if (crypto_state == NULL) {
4691 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4692 kcunit);
4693 return EINVAL;
4694 }
4695
4696 cfil_rw_lock_exclusive(&cfil_lck_rw);
4697
4698 cfc = content_filters[kcunit - 1];
4699 if (cfc->cf_kcunit != kcunit) {
4700 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4701 kcunit);
4702 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4703 cfil_crypto_cleanup_state(crypto_state);
4704 return EINVAL;
4705 }
4706 if (cfc->cf_crypto_state != NULL) {
4707 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4708 cfc->cf_crypto_state = NULL;
4709 }
4710 cfc->cf_crypto_state = crypto_state;
4711
4712 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4713 return 0;
4714 }
4715
4716 static int
4717 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4718 {
4719 struct cfil_entry *entry;
4720 struct cfe_buf *entrybuf;
4721 uint32_t kcunit;
4722
4723 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4724 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4725
4726 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4727 entry = &cfil_info->cfi_entries[kcunit - 1];
4728
4729 /* Are we attached to the filter? */
4730 if (entry->cfe_filter == NULL) {
4731 continue;
4732 }
4733
4734 if (outgoing) {
4735 entrybuf = &entry->cfe_snd;
4736 } else {
4737 entrybuf = &entry->cfe_rcv;
4738 }
4739
4740 entrybuf->cfe_ctl_q.q_start += datalen;
4741 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4742 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4743 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4744 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4745 }
4746
4747 entrybuf->cfe_ctl_q.q_end += datalen;
4748
4749 entrybuf->cfe_pending_q.q_start += datalen;
4750 entrybuf->cfe_pending_q.q_end += datalen;
4751 }
4752 CFIL_INFO_VERIFY(cfil_info);
4753 return 0;
4754 }
4755
4756 int
4757 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4758 struct mbuf *data, struct mbuf *control, uint32_t flags)
4759 {
4760 #pragma unused(to, control, flags)
4761 errno_t error = 0;
4762 unsigned int datalen;
4763 int mbcnt = 0;
4764 int mbnum = 0;
4765 int kcunit;
4766 struct cfi_buf *cfi_buf;
4767 struct mbuf *chain = NULL;
4768
4769 if (cfil_info == NULL) {
4770 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4771 (uint64_t)VM_KERNEL_ADDRPERM(so));
4772 error = 0;
4773 goto done;
4774 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4775 CFIL_LOG(LOG_ERR, "so %llx drop set",
4776 (uint64_t)VM_KERNEL_ADDRPERM(so));
4777 error = EPIPE;
4778 goto done;
4779 }
4780
4781 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4782
4783 if (outgoing) {
4784 cfi_buf = &cfil_info->cfi_snd;
4785 cfil_info->cfi_byte_outbound_count += datalen;
4786 } else {
4787 cfi_buf = &cfil_info->cfi_rcv;
4788 cfil_info->cfi_byte_inbound_count += datalen;
4789 }
4790
4791 cfi_buf->cfi_pending_last += datalen;
4792 cfi_buf->cfi_pending_mbcnt += mbcnt;
4793 cfi_buf->cfi_pending_mbnum += mbnum;
4794
4795 if (IS_IP_DGRAM(so)) {
4796 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4797 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4798 cfi_buf->cfi_tail_drop_cnt++;
4799 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4800 cfi_buf->cfi_pending_mbnum -= mbnum;
4801 return EPIPE;
4802 }
4803 }
4804
4805 cfil_info_buf_verify(cfi_buf);
4806
4807 #if DATA_DEBUG
4808 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4809 (uint64_t)VM_KERNEL_ADDRPERM(so),
4810 outgoing ? "OUT" : "IN",
4811 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4812 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4813 cfi_buf->cfi_pending_last,
4814 cfi_buf->cfi_pending_mbcnt,
4815 cfi_buf->cfi_pass_offset);
4816 #endif
4817
4818 /* Fast path when below pass offset */
4819 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4820 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4821 #if DATA_DEBUG
4822 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4823 #endif
4824 } else {
4825 struct cfil_entry *iter_entry;
4826 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4827 // Is cfil attached to this filter?
4828 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4829 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4830 if (IS_IP_DGRAM(so) && chain == NULL) {
4831 /* Datagrams only:
4832 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4833 * This full chain will be reinjected into socket after recieving verdict.
4834 */
4835 (void) cfil_dgram_save_socket_state(cfil_info, data);
4836 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4837 if (chain == NULL) {
4838 return ENOBUFS;
4839 }
4840 data = chain;
4841 }
4842 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4843 datalen);
4844 }
4845 /* 0 means passed so continue with next filter */
4846 if (error != 0) {
4847 break;
4848 }
4849 }
4850 }
4851
4852 /* Move cursor if no filter claimed the data */
4853 if (error == 0) {
4854 cfi_buf->cfi_pending_first += datalen;
4855 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4856 cfi_buf->cfi_pending_mbnum -= mbnum;
4857 cfil_info_buf_verify(cfi_buf);
4858 }
4859 done:
4860 CFIL_INFO_VERIFY(cfil_info);
4861
4862 return error;
4863 }
4864
4865 /*
4866 * Callback from socket layer sosendxxx()
4867 */
4868 int
4869 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4870 struct mbuf *data, struct mbuf *control, uint32_t flags)
4871 {
4872 int error = 0;
4873 int new_filter_control_unit = 0;
4874
4875 if (IS_IP_DGRAM(so)) {
4876 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4877 }
4878
4879 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4880 /* Drop pre-existing TCP sockets if filter is enabled now */
4881 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4882 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4883 if (new_filter_control_unit > 0) {
4884 return EPIPE;
4885 }
4886 }
4887 return 0;
4888 }
4889
4890 /* Drop pre-existing TCP sockets when filter state changed */
4891 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4892 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4893 return EPIPE;
4894 }
4895
4896 /*
4897 * Pass initial data for TFO.
4898 */
4899 if (IS_INITIAL_TFO_DATA(so)) {
4900 return 0;
4901 }
4902
4903 socket_lock_assert_owned(so);
4904
4905 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4906 CFIL_LOG(LOG_ERR, "so %llx drop set",
4907 (uint64_t)VM_KERNEL_ADDRPERM(so));
4908 return EPIPE;
4909 }
4910 if (control != NULL) {
4911 CFIL_LOG(LOG_ERR, "so %llx control",
4912 (uint64_t)VM_KERNEL_ADDRPERM(so));
4913 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4914 }
4915 if ((flags & MSG_OOB)) {
4916 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4917 (uint64_t)VM_KERNEL_ADDRPERM(so));
4918 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4919 }
4920 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4921 panic("so %p SB_LOCK not set", so);
4922 }
4923
4924 if (so->so_snd.sb_cfil_thread != NULL) {
4925 panic("%s sb_cfil_thread %p not NULL", __func__,
4926 so->so_snd.sb_cfil_thread);
4927 }
4928
4929 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4930
4931 return error;
4932 }
4933
4934 /*
4935 * Callback from socket layer sbappendxxx()
4936 */
4937 int
4938 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4939 struct mbuf *data, struct mbuf *control, uint32_t flags)
4940 {
4941 int error = 0;
4942 int new_filter_control_unit = 0;
4943
4944 if (IS_IP_DGRAM(so)) {
4945 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4946 }
4947
4948 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4949 /* Drop pre-existing TCP sockets if filter is enabled now */
4950 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4951 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4952 if (new_filter_control_unit > 0) {
4953 return EPIPE;
4954 }
4955 }
4956 return 0;
4957 }
4958
4959 /* Drop pre-existing TCP sockets when filter state changed */
4960 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4961 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4962 return EPIPE;
4963 }
4964
4965 /*
4966 * Pass initial data for TFO.
4967 */
4968 if (IS_INITIAL_TFO_DATA(so)) {
4969 return 0;
4970 }
4971
4972 socket_lock_assert_owned(so);
4973
4974 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4975 CFIL_LOG(LOG_ERR, "so %llx drop set",
4976 (uint64_t)VM_KERNEL_ADDRPERM(so));
4977 return EPIPE;
4978 }
4979 if (control != NULL) {
4980 CFIL_LOG(LOG_ERR, "so %llx control",
4981 (uint64_t)VM_KERNEL_ADDRPERM(so));
4982 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4983 }
4984 if (data->m_type == MT_OOBDATA) {
4985 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4986 (uint64_t)VM_KERNEL_ADDRPERM(so));
4987 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4988 }
4989 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4990
4991 return error;
4992 }
4993
4994 /*
4995 * Callback from socket layer soshutdownxxx()
4996 *
4997 * We may delay the shutdown write if there's outgoing data in process.
4998 *
4999 * There is no point in delaying the shutdown read because the process
5000 * indicated that it does not want to read anymore data.
5001 */
5002 int
5003 cfil_sock_shutdown(struct socket *so, int *how)
5004 {
5005 int error = 0;
5006
5007 if (IS_IP_DGRAM(so)) {
5008 return cfil_sock_udp_shutdown(so, how);
5009 }
5010
5011 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5012 goto done;
5013 }
5014
5015 socket_lock_assert_owned(so);
5016
5017 CFIL_LOG(LOG_INFO, "so %llx how %d",
5018 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5019
5020 /*
5021 * Check the state of the socket before the content filter
5022 */
5023 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5024 /* read already shut down */
5025 error = ENOTCONN;
5026 goto done;
5027 }
5028 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5029 /* write already shut down */
5030 error = ENOTCONN;
5031 goto done;
5032 }
5033
5034 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5035 CFIL_LOG(LOG_ERR, "so %llx drop set",
5036 (uint64_t)VM_KERNEL_ADDRPERM(so));
5037 goto done;
5038 }
5039
5040 /*
5041 * shutdown read: SHUT_RD or SHUT_RDWR
5042 */
5043 if (*how != SHUT_WR) {
5044 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5045 error = ENOTCONN;
5046 goto done;
5047 }
5048 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5049 cfil_sock_notify_shutdown(so, SHUT_RD);
5050 }
5051 /*
5052 * shutdown write: SHUT_WR or SHUT_RDWR
5053 */
5054 if (*how != SHUT_RD) {
5055 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5056 error = ENOTCONN;
5057 goto done;
5058 }
5059 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5060 cfil_sock_notify_shutdown(so, SHUT_WR);
5061 /*
5062 * When outgoing data is pending, we delay the shutdown at the
5063 * protocol level until the content filters give the final
5064 * verdict on the pending data.
5065 */
5066 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5067 /*
5068 * When shutting down the read and write sides at once
5069 * we can proceed to the final shutdown of the read
5070 * side. Otherwise, we just return.
5071 */
5072 if (*how == SHUT_WR) {
5073 error = EJUSTRETURN;
5074 } else if (*how == SHUT_RDWR) {
5075 *how = SHUT_RD;
5076 }
5077 }
5078 }
5079 done:
5080 return error;
5081 }
5082
5083 /*
5084 * This is called when the socket is closed and there is no more
5085 * opportunity for filtering
5086 */
5087 void
5088 cfil_sock_is_closed(struct socket *so)
5089 {
5090 errno_t error = 0;
5091 int kcunit;
5092
5093 if (IS_IP_DGRAM(so)) {
5094 cfil_sock_udp_is_closed(so);
5095 return;
5096 }
5097
5098 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5099 return;
5100 }
5101
5102 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5103
5104 socket_lock_assert_owned(so);
5105
5106 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5107 /* Let the filters know of the closing */
5108 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5109 }
5110
5111 /* Last chance to push passed data out */
5112 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5113 if (error == 0) {
5114 cfil_service_inject_queue(so, so->so_cfil, 1);
5115 }
5116 cfil_release_sockbuf(so, 1);
5117
5118 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5119
5120 /* Pending data needs to go */
5121 cfil_flush_queues(so, so->so_cfil);
5122
5123 CFIL_INFO_VERIFY(so->so_cfil);
5124 }
5125
5126 /*
5127 * This is called when the socket is disconnected so let the filters
5128 * know about the disconnection and that no more data will come
5129 *
5130 * The how parameter has the same values as soshutown()
5131 */
5132 void
5133 cfil_sock_notify_shutdown(struct socket *so, int how)
5134 {
5135 errno_t error = 0;
5136 int kcunit;
5137
5138 if (IS_IP_DGRAM(so)) {
5139 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5140 return;
5141 }
5142
5143 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5144 return;
5145 }
5146
5147 CFIL_LOG(LOG_INFO, "so %llx how %d",
5148 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5149
5150 socket_lock_assert_owned(so);
5151
5152 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5153 /* Disconnect incoming side */
5154 if (how != SHUT_WR) {
5155 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5156 }
5157 /* Disconnect outgoing side */
5158 if (how != SHUT_RD) {
5159 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5160 }
5161 }
5162 }
5163
5164 static int
5165 cfil_filters_attached(struct socket *so)
5166 {
5167 struct cfil_entry *entry;
5168 uint32_t kcunit;
5169 int attached = 0;
5170
5171 if (IS_IP_DGRAM(so)) {
5172 return cfil_filters_udp_attached(so, FALSE);
5173 }
5174
5175 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5176 return 0;
5177 }
5178
5179 socket_lock_assert_owned(so);
5180
5181 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5182 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5183
5184 /* Are we attached to the filter? */
5185 if (entry->cfe_filter == NULL) {
5186 continue;
5187 }
5188 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5189 continue;
5190 }
5191 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5192 continue;
5193 }
5194 attached = 1;
5195 break;
5196 }
5197
5198 return attached;
5199 }
5200
5201 /*
5202 * This is called when the socket is closed and we are waiting for
5203 * the filters to gives the final pass or drop
5204 */
5205 void
5206 cfil_sock_close_wait(struct socket *so)
5207 {
5208 lck_mtx_t *mutex_held;
5209 struct timespec ts;
5210 int error;
5211
5212 if (IS_IP_DGRAM(so)) {
5213 cfil_sock_udp_close_wait(so);
5214 return;
5215 }
5216
5217 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5218 return;
5219 }
5220
5221 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5222
5223 if (so->so_proto->pr_getlock != NULL) {
5224 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5225 } else {
5226 mutex_held = so->so_proto->pr_domain->dom_mtx;
5227 }
5228 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5229
5230 while (cfil_filters_attached(so)) {
5231 /*
5232 * Notify the filters we are going away so they can detach
5233 */
5234 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5235
5236 /*
5237 * Make sure we need to wait after the filter are notified
5238 * of the disconnection
5239 */
5240 if (cfil_filters_attached(so) == 0) {
5241 break;
5242 }
5243
5244 CFIL_LOG(LOG_INFO, "so %llx waiting",
5245 (uint64_t)VM_KERNEL_ADDRPERM(so));
5246
5247 ts.tv_sec = cfil_close_wait_timeout / 1000;
5248 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5249 NSEC_PER_USEC * 1000;
5250
5251 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5252 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5253 error = msleep((caddr_t)so->so_cfil, mutex_held,
5254 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5255 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5256
5257 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5258 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5259
5260 /*
5261 * Force close in case of timeout
5262 */
5263 if (error != 0) {
5264 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5265 break;
5266 }
5267 }
5268 }
5269
5270 /*
5271 * Returns the size of the data held by the content filter by using
5272 */
5273 int32_t
5274 cfil_sock_data_pending(struct sockbuf *sb)
5275 {
5276 struct socket *so = sb->sb_so;
5277 uint64_t pending = 0;
5278
5279 if (IS_IP_DGRAM(so)) {
5280 return cfil_sock_udp_data_pending(sb, FALSE);
5281 }
5282
5283 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5284 struct cfi_buf *cfi_buf;
5285
5286 socket_lock_assert_owned(so);
5287
5288 if ((sb->sb_flags & SB_RECV) == 0) {
5289 cfi_buf = &so->so_cfil->cfi_snd;
5290 } else {
5291 cfi_buf = &so->so_cfil->cfi_rcv;
5292 }
5293
5294 pending = cfi_buf->cfi_pending_last -
5295 cfi_buf->cfi_pending_first;
5296
5297 /*
5298 * If we are limited by the "chars of mbufs used" roughly
5299 * adjust so we won't overcommit
5300 */
5301 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5302 pending = cfi_buf->cfi_pending_mbcnt;
5303 }
5304 }
5305
5306 VERIFY(pending < INT32_MAX);
5307
5308 return (int32_t)(pending);
5309 }
5310
5311 /*
5312 * Return the socket buffer space used by data being held by content filters
5313 * so processes won't clog the socket buffer
5314 */
5315 int32_t
5316 cfil_sock_data_space(struct sockbuf *sb)
5317 {
5318 struct socket *so = sb->sb_so;
5319 uint64_t pending = 0;
5320
5321 if (IS_IP_DGRAM(so)) {
5322 return cfil_sock_udp_data_pending(sb, TRUE);
5323 }
5324
5325 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5326 so->so_snd.sb_cfil_thread != current_thread()) {
5327 struct cfi_buf *cfi_buf;
5328
5329 socket_lock_assert_owned(so);
5330
5331 if ((sb->sb_flags & SB_RECV) == 0) {
5332 cfi_buf = &so->so_cfil->cfi_snd;
5333 } else {
5334 cfi_buf = &so->so_cfil->cfi_rcv;
5335 }
5336
5337 pending = cfi_buf->cfi_pending_last -
5338 cfi_buf->cfi_pending_first;
5339
5340 /*
5341 * If we are limited by the "chars of mbufs used" roughly
5342 * adjust so we won't overcommit
5343 */
5344 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5345 pending = cfi_buf->cfi_pending_mbcnt;
5346 }
5347 }
5348
5349 VERIFY(pending < INT32_MAX);
5350
5351 return (int32_t)(pending);
5352 }
5353
5354 /*
5355 * A callback from the socket and protocol layer when data becomes
5356 * available in the socket buffer to give a chance for the content filter
5357 * to re-inject data that was held back
5358 */
5359 void
5360 cfil_sock_buf_update(struct sockbuf *sb)
5361 {
5362 int outgoing;
5363 int error;
5364 struct socket *so = sb->sb_so;
5365
5366 if (IS_IP_DGRAM(so)) {
5367 cfil_sock_udp_buf_update(sb);
5368 return;
5369 }
5370
5371 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5372 return;
5373 }
5374
5375 if (!cfil_sbtrim) {
5376 return;
5377 }
5378
5379 socket_lock_assert_owned(so);
5380
5381 if ((sb->sb_flags & SB_RECV) == 0) {
5382 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5383 return;
5384 }
5385 outgoing = 1;
5386 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5387 } else {
5388 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5389 return;
5390 }
5391 outgoing = 0;
5392 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5393 }
5394
5395 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5396 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5397
5398 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5399 if (error == 0) {
5400 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5401 }
5402 cfil_release_sockbuf(so, outgoing);
5403 }
5404
5405 int
5406 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5407 struct sysctl_req *req)
5408 {
5409 #pragma unused(oidp, arg1, arg2)
5410 int error = 0;
5411 size_t len = 0;
5412 u_int32_t i;
5413
5414 /* Read only */
5415 if (req->newptr != USER_ADDR_NULL) {
5416 return EPERM;
5417 }
5418
5419 cfil_rw_lock_shared(&cfil_lck_rw);
5420
5421 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5422 struct cfil_filter_stat filter_stat;
5423 struct content_filter *cfc = content_filters[i];
5424
5425 if (cfc == NULL) {
5426 continue;
5427 }
5428
5429 /* If just asking for the size */
5430 if (req->oldptr == USER_ADDR_NULL) {
5431 len += sizeof(struct cfil_filter_stat);
5432 continue;
5433 }
5434
5435 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5436 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5437 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5438 filter_stat.cfs_flags = cfc->cf_flags;
5439 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5440 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5441
5442 error = SYSCTL_OUT(req, &filter_stat,
5443 sizeof(struct cfil_filter_stat));
5444 if (error != 0) {
5445 break;
5446 }
5447 }
5448 /* If just asking for the size */
5449 if (req->oldptr == USER_ADDR_NULL) {
5450 req->oldidx = len;
5451 }
5452
5453 cfil_rw_unlock_shared(&cfil_lck_rw);
5454
5455 #if SHOW_DEBUG
5456 if (req->oldptr != USER_ADDR_NULL) {
5457 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5458 cfil_filter_show(i);
5459 }
5460 }
5461 #endif
5462
5463 return error;
5464 }
5465
5466 static int
5467 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5468 struct sysctl_req *req)
5469 {
5470 #pragma unused(oidp, arg1, arg2)
5471 int error = 0;
5472 u_int32_t i;
5473 struct cfil_info *cfi;
5474
5475 /* Read only */
5476 if (req->newptr != USER_ADDR_NULL) {
5477 return EPERM;
5478 }
5479
5480 cfil_rw_lock_shared(&cfil_lck_rw);
5481
5482 /*
5483 * If just asking for the size,
5484 */
5485 if (req->oldptr == USER_ADDR_NULL) {
5486 req->oldidx = cfil_sock_attached_count *
5487 sizeof(struct cfil_sock_stat);
5488 /* Bump the length in case new sockets gets attached */
5489 req->oldidx += req->oldidx >> 3;
5490 goto done;
5491 }
5492
5493 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5494 struct cfil_entry *entry;
5495 struct cfil_sock_stat stat;
5496 struct socket *so = cfi->cfi_so;
5497
5498 bzero(&stat, sizeof(struct cfil_sock_stat));
5499 stat.cfs_len = sizeof(struct cfil_sock_stat);
5500 stat.cfs_sock_id = cfi->cfi_sock_id;
5501 stat.cfs_flags = cfi->cfi_flags;
5502
5503 if (so != NULL) {
5504 stat.cfs_pid = so->last_pid;
5505 memcpy(stat.cfs_uuid, so->last_uuid,
5506 sizeof(uuid_t));
5507 if (so->so_flags & SOF_DELEGATED) {
5508 stat.cfs_e_pid = so->e_pid;
5509 memcpy(stat.cfs_e_uuid, so->e_uuid,
5510 sizeof(uuid_t));
5511 } else {
5512 stat.cfs_e_pid = so->last_pid;
5513 memcpy(stat.cfs_e_uuid, so->last_uuid,
5514 sizeof(uuid_t));
5515 }
5516
5517 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5518 stat.cfs_sock_type = so->so_proto->pr_type;
5519 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5520 }
5521
5522 stat.cfs_snd.cbs_pending_first =
5523 cfi->cfi_snd.cfi_pending_first;
5524 stat.cfs_snd.cbs_pending_last =
5525 cfi->cfi_snd.cfi_pending_last;
5526 stat.cfs_snd.cbs_inject_q_len =
5527 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5528 stat.cfs_snd.cbs_pass_offset =
5529 cfi->cfi_snd.cfi_pass_offset;
5530
5531 stat.cfs_rcv.cbs_pending_first =
5532 cfi->cfi_rcv.cfi_pending_first;
5533 stat.cfs_rcv.cbs_pending_last =
5534 cfi->cfi_rcv.cfi_pending_last;
5535 stat.cfs_rcv.cbs_inject_q_len =
5536 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5537 stat.cfs_rcv.cbs_pass_offset =
5538 cfi->cfi_rcv.cfi_pass_offset;
5539
5540 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5541 struct cfil_entry_stat *estat;
5542 struct cfe_buf *ebuf;
5543 struct cfe_buf_stat *sbuf;
5544
5545 entry = &cfi->cfi_entries[i];
5546
5547 estat = &stat.ces_entries[i];
5548
5549 estat->ces_len = sizeof(struct cfil_entry_stat);
5550 estat->ces_filter_id = entry->cfe_filter ?
5551 entry->cfe_filter->cf_kcunit : 0;
5552 estat->ces_flags = entry->cfe_flags;
5553 estat->ces_necp_control_unit =
5554 entry->cfe_necp_control_unit;
5555
5556 estat->ces_last_event.tv_sec =
5557 (int64_t)entry->cfe_last_event.tv_sec;
5558 estat->ces_last_event.tv_usec =
5559 (int64_t)entry->cfe_last_event.tv_usec;
5560
5561 estat->ces_last_action.tv_sec =
5562 (int64_t)entry->cfe_last_action.tv_sec;
5563 estat->ces_last_action.tv_usec =
5564 (int64_t)entry->cfe_last_action.tv_usec;
5565
5566 ebuf = &entry->cfe_snd;
5567 sbuf = &estat->ces_snd;
5568 sbuf->cbs_pending_first =
5569 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5570 sbuf->cbs_pending_last =
5571 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5572 sbuf->cbs_ctl_first =
5573 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5574 sbuf->cbs_ctl_last =
5575 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5576 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5577 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5578 sbuf->cbs_peeked = ebuf->cfe_peeked;
5579
5580 ebuf = &entry->cfe_rcv;
5581 sbuf = &estat->ces_rcv;
5582 sbuf->cbs_pending_first =
5583 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5584 sbuf->cbs_pending_last =
5585 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5586 sbuf->cbs_ctl_first =
5587 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5588 sbuf->cbs_ctl_last =
5589 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5590 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5591 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5592 sbuf->cbs_peeked = ebuf->cfe_peeked;
5593 }
5594 error = SYSCTL_OUT(req, &stat,
5595 sizeof(struct cfil_sock_stat));
5596 if (error != 0) {
5597 break;
5598 }
5599 }
5600 done:
5601 cfil_rw_unlock_shared(&cfil_lck_rw);
5602
5603 #if SHOW_DEBUG
5604 if (req->oldptr != USER_ADDR_NULL) {
5605 cfil_info_show();
5606 }
5607 #endif
5608
5609 return error;
5610 }
5611
5612 /*
5613 * UDP Socket Support
5614 */
5615 static void
5616 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5617 {
5618 char local[MAX_IPv6_STR_LEN + 6];
5619 char remote[MAX_IPv6_STR_LEN + 6];
5620 const void *addr;
5621
5622 // No sock or not UDP, no-op
5623 if (so == NULL || entry == NULL) {
5624 return;
5625 }
5626
5627 local[0] = remote[0] = 0x0;
5628
5629 switch (entry->cfentry_family) {
5630 case AF_INET6:
5631 addr = &entry->cfentry_laddr.addr6;
5632 inet_ntop(AF_INET6, addr, local, sizeof(local));
5633 addr = &entry->cfentry_faddr.addr6;
5634 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5635 break;
5636 case AF_INET:
5637 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5638 inet_ntop(AF_INET, addr, local, sizeof(local));
5639 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5640 inet_ntop(AF_INET, addr, remote, sizeof(local));
5641 break;
5642 default:
5643 return;
5644 }
5645
5646 CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5647 msg,
5648 IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5649 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5650 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote,
5651 entry->cfentry_flowhash);
5652 }
5653
5654 static void
5655 cfil_inp_log(int level, struct socket *so, const char* msg)
5656 {
5657 struct inpcb *inp = NULL;
5658 char local[MAX_IPv6_STR_LEN + 6];
5659 char remote[MAX_IPv6_STR_LEN + 6];
5660 const void *addr;
5661
5662 if (so == NULL) {
5663 return;
5664 }
5665
5666 inp = sotoinpcb(so);
5667 if (inp == NULL) {
5668 return;
5669 }
5670
5671 local[0] = remote[0] = 0x0;
5672
5673 if (inp->inp_vflag & INP_IPV6) {
5674 addr = &inp->in6p_laddr.s6_addr32;
5675 inet_ntop(AF_INET6, addr, local, sizeof(local));
5676 addr = &inp->in6p_faddr.s6_addr32;
5677 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5678 } else {
5679 addr = &inp->inp_laddr.s_addr;
5680 inet_ntop(AF_INET, addr, local, sizeof(local));
5681 addr = &inp->inp_faddr.s_addr;
5682 inet_ntop(AF_INET, addr, remote, sizeof(local));
5683 }
5684
5685 if (so->so_cfil != NULL) {
5686 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5687 msg, IS_UDP(so) ? "UDP" : "TCP",
5688 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5689 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5690 } else {
5691 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5692 msg, IS_UDP(so) ? "UDP" : "TCP",
5693 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5694 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5695 }
5696 }
5697
5698 static void
5699 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5700 {
5701 if (cfil_info == NULL) {
5702 return;
5703 }
5704
5705 if (cfil_info->cfi_hash_entry != NULL) {
5706 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5707 } else {
5708 cfil_inp_log(level, cfil_info->cfi_so, msg);
5709 }
5710 }
5711
5712 errno_t
5713 cfil_db_init(struct socket *so)
5714 {
5715 errno_t error = 0;
5716 struct cfil_db *db = NULL;
5717
5718 CFIL_LOG(LOG_INFO, "");
5719
5720 db = zalloc(cfil_db_zone);
5721 if (db == NULL) {
5722 error = ENOMEM;
5723 goto done;
5724 }
5725 bzero(db, sizeof(struct cfil_db));
5726 db->cfdb_so = so;
5727 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5728 if (db->cfdb_hashbase == NULL) {
5729 zfree(cfil_db_zone, db);
5730 db = NULL;
5731 error = ENOMEM;
5732 goto done;
5733 }
5734
5735 so->so_cfil_db = db;
5736
5737 done:
5738 return error;
5739 }
5740
5741 void
5742 cfil_db_free(struct socket *so)
5743 {
5744 struct cfil_hash_entry *entry = NULL;
5745 struct cfil_hash_entry *temp_entry = NULL;
5746 struct cfilhashhead *cfilhash = NULL;
5747 struct cfil_db *db = NULL;
5748
5749 CFIL_LOG(LOG_INFO, "");
5750
5751 if (so == NULL || so->so_cfil_db == NULL) {
5752 return;
5753 }
5754 db = so->so_cfil_db;
5755
5756 #if LIFECYCLE_DEBUG
5757 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5758 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5759 #endif
5760
5761 for (int i = 0; i < CFILHASHSIZE; i++) {
5762 cfilhash = &db->cfdb_hashbase[i];
5763 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5764 if (entry->cfentry_cfil != NULL) {
5765 #if LIFECYCLE_DEBUG
5766 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5767 #endif
5768 CFIL_INFO_FREE(entry->cfentry_cfil);
5769 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5770 entry->cfentry_cfil = NULL;
5771 }
5772
5773 cfil_db_delete_entry(db, entry);
5774 if (so->so_flags & SOF_CONTENT_FILTER) {
5775 if (db->cfdb_count == 0) {
5776 so->so_flags &= ~SOF_CONTENT_FILTER;
5777 }
5778 VERIFY(so->so_usecount > 0);
5779 so->so_usecount--;
5780 }
5781 }
5782 }
5783
5784 // Make sure all entries are cleaned up!
5785 VERIFY(db->cfdb_count == 0);
5786 #if LIFECYCLE_DEBUG
5787 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5788 #endif
5789
5790 hashdestroy(db->cfdb_hashbase, M_CFIL, db->cfdb_hashmask);
5791 zfree(cfil_db_zone, db);
5792 so->so_cfil_db = NULL;
5793 }
5794
5795 static bool
5796 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr, bool islocalUpdate)
5797 {
5798 struct sockaddr_in *sin = NULL;
5799 struct sockaddr_in6 *sin6 = NULL;
5800
5801 if (entry == NULL || addr == NULL) {
5802 return FALSE;
5803 }
5804
5805 switch (addr->sa_family) {
5806 case AF_INET:
5807 sin = satosin(addr);
5808 if (sin->sin_len != sizeof(*sin)) {
5809 return FALSE;
5810 }
5811 if (isLocal == TRUE) {
5812 if (sin->sin_port) {
5813 entry->cfentry_lport = sin->sin_port;
5814 if (islocalUpdate) {
5815 entry->cfentry_lport_updated = TRUE;
5816 }
5817 }
5818 if (sin->sin_addr.s_addr) {
5819 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5820 if (islocalUpdate) {
5821 entry->cfentry_laddr_updated = TRUE;
5822 }
5823 }
5824 } else {
5825 if (sin->sin_port) {
5826 entry->cfentry_fport = sin->sin_port;
5827 }
5828 if (sin->sin_addr.s_addr) {
5829 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5830 }
5831 }
5832 entry->cfentry_family = AF_INET;
5833 return TRUE;
5834 case AF_INET6:
5835 sin6 = satosin6(addr);
5836 if (sin6->sin6_len != sizeof(*sin6)) {
5837 return FALSE;
5838 }
5839 if (isLocal == TRUE) {
5840 if (sin6->sin6_port) {
5841 entry->cfentry_lport = sin6->sin6_port;
5842 if (islocalUpdate) {
5843 entry->cfentry_lport_updated = TRUE;
5844 }
5845 }
5846 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5847 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5848 if (islocalUpdate) {
5849 entry->cfentry_laddr_updated = TRUE;
5850 }
5851 }
5852 } else {
5853 if (sin6->sin6_port) {
5854 entry->cfentry_fport = sin6->sin6_port;
5855 }
5856 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5857 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5858 }
5859 }
5860 entry->cfentry_family = AF_INET6;
5861 return TRUE;
5862 default:
5863 return FALSE;
5864 }
5865 }
5866
5867 static bool
5868 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp, bool islocalUpdate)
5869 {
5870 if (entry == NULL || inp == NULL) {
5871 return FALSE;
5872 }
5873
5874 if (inp->inp_vflag & INP_IPV6) {
5875 if (isLocal == TRUE) {
5876 if (inp->inp_lport) {
5877 entry->cfentry_lport = inp->inp_lport;
5878 if (islocalUpdate) {
5879 entry->cfentry_lport_updated = TRUE;
5880 }
5881 }
5882 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
5883 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5884 if (islocalUpdate) {
5885 entry->cfentry_laddr_updated = TRUE;
5886 }
5887 }
5888 } else {
5889 if (inp->inp_fport) {
5890 entry->cfentry_fport = inp->inp_fport;
5891 }
5892 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
5893 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5894 }
5895 }
5896 entry->cfentry_family = AF_INET6;
5897 return TRUE;
5898 } else if (inp->inp_vflag & INP_IPV4) {
5899 if (isLocal == TRUE) {
5900 if (inp->inp_lport) {
5901 entry->cfentry_lport = inp->inp_lport;
5902 if (islocalUpdate) {
5903 entry->cfentry_lport_updated = TRUE;
5904 }
5905 }
5906 if (inp->inp_laddr.s_addr) {
5907 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5908 if (islocalUpdate) {
5909 entry->cfentry_laddr_updated = TRUE;
5910 }
5911 }
5912 } else {
5913 if (inp->inp_fport) {
5914 entry->cfentry_fport = inp->inp_fport;
5915 }
5916 if (inp->inp_faddr.s_addr) {
5917 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5918 }
5919 }
5920 entry->cfentry_family = AF_INET;
5921 return TRUE;
5922 }
5923 return FALSE;
5924 }
5925
5926 bool
5927 check_port(struct sockaddr *addr, u_short port)
5928 {
5929 struct sockaddr_in *sin = NULL;
5930 struct sockaddr_in6 *sin6 = NULL;
5931
5932 if (addr == NULL || port == 0) {
5933 return FALSE;
5934 }
5935
5936 switch (addr->sa_family) {
5937 case AF_INET:
5938 sin = satosin(addr);
5939 if (sin->sin_len != sizeof(*sin)) {
5940 return FALSE;
5941 }
5942 if (port == ntohs(sin->sin_port)) {
5943 return TRUE;
5944 }
5945 break;
5946 case AF_INET6:
5947 sin6 = satosin6(addr);
5948 if (sin6->sin6_len != sizeof(*sin6)) {
5949 return FALSE;
5950 }
5951 if (port == ntohs(sin6->sin6_port)) {
5952 return TRUE;
5953 }
5954 break;
5955 default:
5956 break;
5957 }
5958 return FALSE;
5959 }
5960
5961 struct cfil_hash_entry *
5962 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5963 {
5964 struct cfilhashhead *cfilhash = NULL;
5965 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5966 struct cfil_hash_entry *nextentry;
5967
5968 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5969 return NULL;
5970 }
5971
5972 flowhash &= db->cfdb_hashmask;
5973 cfilhash = &db->cfdb_hashbase[flowhash];
5974
5975 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5976 if (nextentry->cfentry_cfil != NULL &&
5977 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5978 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5979 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5980 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5981 return nextentry;
5982 }
5983 }
5984
5985 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5986 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5987 return NULL;
5988 }
5989
5990 struct cfil_hash_entry *
5991 cfil_db_lookup_entry_internal(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly, boolean_t withLocalPort)
5992 {
5993 struct cfil_hash_entry matchentry = { };
5994 struct cfil_hash_entry *nextentry = NULL;
5995 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5996 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5997 u_int16_t hashkey_fport = 0, hashkey_lport = 0;
5998 int inp_hash_element = 0;
5999 struct cfilhashhead *cfilhash = NULL;
6000
6001 CFIL_LOG(LOG_INFO, "");
6002
6003 if (inp == NULL) {
6004 goto done;
6005 }
6006
6007 if (local != NULL) {
6008 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local, FALSE);
6009 } else {
6010 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp, FALSE);
6011 }
6012 if (remote != NULL) {
6013 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote, FALSE);
6014 } else {
6015 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp, FALSE);
6016 }
6017
6018 if (inp->inp_vflag & INP_IPV6) {
6019 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
6020 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
6021 } else {
6022 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
6023 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
6024 }
6025
6026 hashkey_fport = matchentry.cfentry_fport;
6027 hashkey_lport = (remoteOnly == false || withLocalPort == true) ? matchentry.cfentry_lport : 0;
6028
6029 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
6030 inp_hash_element &= db->cfdb_hashmask;
6031 cfilhash = &db->cfdb_hashbase[inp_hash_element];
6032
6033 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6034 if ((inp->inp_vflag & INP_IPV6) &&
6035 (remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6036 nextentry->cfentry_fport == matchentry.cfentry_fport &&
6037 (remoteOnly || nextentry->cfentry_laddr_updated || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
6038 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
6039 #if DATA_DEBUG
6040 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6041 #endif
6042 return nextentry;
6043 } else if ((remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6044 nextentry->cfentry_fport == matchentry.cfentry_fport &&
6045 (remoteOnly || nextentry->cfentry_laddr_updated || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
6046 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
6047 #if DATA_DEBUG
6048 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6049 #endif
6050 return nextentry;
6051 }
6052 }
6053
6054 done:
6055 #if DATA_DEBUG
6056 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6057 #endif
6058 return NULL;
6059 }
6060
6061 struct cfil_hash_entry *
6062 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
6063 {
6064 struct cfil_hash_entry *entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, false);
6065 if (entry == NULL && remoteOnly == true) {
6066 entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, true);
6067 }
6068 return entry;
6069 }
6070
6071 cfil_sock_id_t
6072 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6073 {
6074 struct cfil_hash_entry *hash_entry = NULL;
6075
6076 socket_lock_assert_owned(so);
6077
6078 if (so->so_cfil_db == NULL) {
6079 return CFIL_SOCK_ID_NONE;
6080 }
6081
6082 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6083 if (hash_entry == NULL) {
6084 // No match with both local and remote, try match with remote only
6085 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6086 }
6087 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6088 return CFIL_SOCK_ID_NONE;
6089 }
6090
6091 return hash_entry->cfentry_cfil->cfi_sock_id;
6092 }
6093
6094 void
6095 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6096 {
6097 if (hash_entry == NULL) {
6098 return;
6099 }
6100 if (db == NULL || db->cfdb_count == 0) {
6101 return;
6102 }
6103 db->cfdb_count--;
6104 if (db->cfdb_only_entry == hash_entry) {
6105 db->cfdb_only_entry = NULL;
6106 }
6107 LIST_REMOVE(hash_entry, cfentry_link);
6108 zfree(cfil_hash_entry_zone, hash_entry);
6109 }
6110
6111 struct cfil_hash_entry *
6112 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6113 {
6114 struct cfil_hash_entry *entry = NULL;
6115 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6116 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6117 int inp_hash_element = 0;
6118 struct cfilhashhead *cfilhash = NULL;
6119
6120 CFIL_LOG(LOG_INFO, "");
6121
6122 if (inp == NULL) {
6123 goto done;
6124 }
6125
6126 entry = zalloc(cfil_hash_entry_zone);
6127 if (entry == NULL) {
6128 goto done;
6129 }
6130 bzero(entry, sizeof(struct cfil_hash_entry));
6131
6132 if (local != NULL) {
6133 fill_cfil_hash_entry_from_address(entry, TRUE, local, FALSE);
6134 } else {
6135 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, FALSE);
6136 }
6137 if (remote != NULL) {
6138 fill_cfil_hash_entry_from_address(entry, FALSE, remote, FALSE);
6139 } else {
6140 fill_cfil_hash_entry_from_inp(entry, FALSE, inp, FALSE);
6141 }
6142 entry->cfentry_lastused = net_uptime();
6143
6144 if (inp->inp_vflag & INP_IPV6) {
6145 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6146 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6147 } else {
6148 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6149 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6150 }
6151 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6152 entry->cfentry_lport, entry->cfentry_fport);
6153 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6154
6155 cfilhash = &db->cfdb_hashbase[inp_hash_element];
6156
6157 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6158 db->cfdb_count++;
6159 db->cfdb_only_entry = entry;
6160 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6161
6162 done:
6163 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6164 return entry;
6165 }
6166
6167 void
6168 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local, struct mbuf *control)
6169 {
6170 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6171 union sockaddr_in_4_6 address_buf = { };
6172
6173 CFIL_LOG(LOG_INFO, "");
6174
6175 if (inp == NULL || entry == NULL) {
6176 return;
6177 }
6178
6179 if (LOCAL_ADDRESS_NEEDS_UPDATE(entry)) {
6180 // Flow does not have a local address yet. Retrieve local address
6181 // from control mbufs if present.
6182 if (local == NULL && control != NULL) {
6183 uint8_t *addr_ptr = NULL;
6184 int size = cfil_sock_udp_get_address_from_control(entry->cfentry_family, control, &addr_ptr);
6185
6186 if (size && addr_ptr) {
6187 switch (entry->cfentry_family) {
6188 case AF_INET:
6189 if (size == sizeof(struct in_addr)) {
6190 address_buf.sin.sin_port = 0;
6191 address_buf.sin.sin_family = AF_INET;
6192 address_buf.sin.sin_len = sizeof(struct sockaddr_in);
6193 (void) memcpy(&address_buf.sin.sin_addr, addr_ptr, sizeof(struct in_addr));
6194 local = sintosa(&address_buf.sin);
6195 }
6196 break;
6197 case AF_INET6:
6198 if (size == sizeof(struct in6_addr)) {
6199 address_buf.sin6.sin6_port = 0;
6200 address_buf.sin6.sin6_family = AF_INET6;
6201 address_buf.sin6.sin6_len = sizeof(struct sockaddr_in6);
6202 (void) memcpy(&address_buf.sin6.sin6_addr, addr_ptr, sizeof(struct in6_addr));
6203 local = sin6tosa(&address_buf.sin6);
6204 }
6205 break;
6206 default:
6207 break;
6208 }
6209 }
6210 }
6211 if (local != NULL) {
6212 fill_cfil_hash_entry_from_address(entry, TRUE, local, TRUE);
6213 } else {
6214 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6215 }
6216 }
6217
6218 if (LOCAL_PORT_NEEDS_UPDATE(entry, db->cfdb_so)) {
6219 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6220 }
6221
6222 return;
6223 }
6224
6225 struct cfil_info *
6226 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6227 {
6228 struct cfil_hash_entry *hash_entry = NULL;
6229
6230 CFIL_LOG(LOG_INFO, "");
6231
6232 if (db == NULL || id == 0) {
6233 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6234 db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6235 return NULL;
6236 }
6237
6238 // This is an optimization for connected UDP socket which only has one flow.
6239 // No need to do the hash lookup.
6240 if (db->cfdb_count == 1) {
6241 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6242 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6243 return db->cfdb_only_entry->cfentry_cfil;
6244 }
6245 }
6246
6247 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6248 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6249 }
6250
6251 struct cfil_hash_entry *
6252 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, struct mbuf *control, int debug)
6253 {
6254 struct cfil_hash_entry *hash_entry = NULL;
6255 int new_filter_control_unit = 0;
6256
6257 errno_t error = 0;
6258 socket_lock_assert_owned(so);
6259
6260 // If new socket, allocate cfil db
6261 if (so->so_cfil_db == NULL) {
6262 if (cfil_db_init(so) != 0) {
6263 return NULL;
6264 }
6265 }
6266
6267 // See if flow already exists.
6268 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6269 if (hash_entry == NULL) {
6270 // No match with both local and remote, try match with remote only
6271 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6272 }
6273 if (hash_entry != NULL) {
6274 /* Drop pre-existing UDP flow if filter state changed */
6275 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6276 if (new_filter_control_unit > 0 &&
6277 new_filter_control_unit != hash_entry->cfentry_cfil->cfi_filter_control_unit) {
6278 return NULL;
6279 }
6280
6281 // Try to update flow info from socket and/or control mbufs if necessary
6282 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry) || LOCAL_PORT_NEEDS_UPDATE(hash_entry, so)) {
6283 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6284 }
6285 return hash_entry;
6286 }
6287
6288 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6289 if (hash_entry == NULL) {
6290 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6291 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6292 return NULL;
6293 }
6294
6295 if (cfil_info_alloc(so, hash_entry) == NULL ||
6296 hash_entry->cfentry_cfil == NULL) {
6297 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6298 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6299 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6300 return NULL;
6301 }
6302 hash_entry->cfentry_cfil->cfi_filter_control_unit = filter_control_unit;
6303 hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6304 hash_entry->cfentry_cfil->cfi_debug = debug;
6305
6306 #if LIFECYCLE_DEBUG
6307 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6308 #endif
6309
6310 // Check if we can update the new flow's local address from control mbufs
6311 if (control != NULL) {
6312 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6313 }
6314
6315 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6316 CFIL_INFO_FREE(hash_entry->cfentry_cfil);
6317 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6318 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6319 filter_control_unit);
6320 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6321 return NULL;
6322 }
6323 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6324 (uint64_t)VM_KERNEL_ADDRPERM(so),
6325 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6326
6327 so->so_flags |= SOF_CONTENT_FILTER;
6328 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6329
6330 /* Hold a reference on the socket for each flow */
6331 so->so_usecount++;
6332
6333 if (debug) {
6334 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6335 }
6336
6337 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6338 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6339 /* We can recover from flow control or out of memory errors */
6340 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6341 return NULL;
6342 }
6343
6344 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6345 return hash_entry;
6346 }
6347
6348 int
6349 cfil_sock_udp_get_address_from_control(sa_family_t family, struct mbuf *control, uint8_t **address_ptr)
6350 {
6351 struct cmsghdr *cm;
6352 struct in6_pktinfo *pi6;
6353
6354 if (control == NULL || address_ptr == NULL) {
6355 return 0;
6356 }
6357
6358 while (control) {
6359 if (control->m_type != MT_CONTROL) {
6360 control = control->m_next;
6361 continue;
6362 }
6363
6364 for (cm = M_FIRST_CMSGHDR(control);
6365 is_cmsg_valid(control, cm);
6366 cm = M_NXT_CMSGHDR(control, cm)) {
6367 switch (cm->cmsg_type) {
6368 case IP_RECVDSTADDR:
6369 if (family == AF_INET &&
6370 cm->cmsg_level == IPPROTO_IP &&
6371 cm->cmsg_len == CMSG_LEN(sizeof(struct in_addr))) {
6372 *address_ptr = CMSG_DATA(cm);
6373 return sizeof(struct in_addr);
6374 }
6375 break;
6376 case IPV6_PKTINFO:
6377 case IPV6_2292PKTINFO:
6378 if (family == AF_INET6 &&
6379 cm->cmsg_level == IPPROTO_IPV6 &&
6380 cm->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) {
6381 pi6 = (struct in6_pktinfo *)(void *)CMSG_DATA(cm);
6382 *address_ptr = (uint8_t *)&pi6->ipi6_addr;
6383 return sizeof(struct in6_addr);
6384 }
6385 break;
6386 default:
6387 break;
6388 }
6389 }
6390
6391 control = control->m_next;
6392 }
6393 return 0;
6394 }
6395
6396 errno_t
6397 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6398 struct sockaddr *local, struct sockaddr *remote,
6399 struct mbuf *data, struct mbuf *control, uint32_t flags)
6400 {
6401 #pragma unused(outgoing, so, local, remote, data, control, flags)
6402 errno_t error = 0;
6403 uint32_t filter_control_unit;
6404 struct cfil_hash_entry *hash_entry = NULL;
6405 struct cfil_info *cfil_info = NULL;
6406 int debug = 0;
6407
6408 socket_lock_assert_owned(so);
6409
6410 if (cfil_active_count == 0) {
6411 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6412 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6413 return error;
6414 }
6415
6416 // Socket has been blessed
6417 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6418 return error;
6419 }
6420
6421 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6422 if (filter_control_unit == 0) {
6423 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6424 return error;
6425 }
6426
6427 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6428 return error;
6429 }
6430
6431 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6432 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6433 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6434 return error;
6435 }
6436
6437 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, control, debug);
6438 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6439 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6440 return EPIPE;
6441 }
6442 // Update last used timestamp, this is for flow Idle TO
6443 hash_entry->cfentry_lastused = net_uptime();
6444 cfil_info = hash_entry->cfentry_cfil;
6445
6446 if (cfil_info->cfi_flags & CFIF_DROP) {
6447 #if DATA_DEBUG
6448 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6449 #endif
6450 return EPIPE;
6451 }
6452 if (control != NULL) {
6453 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6454 }
6455 if (data->m_type == MT_OOBDATA) {
6456 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6457 (uint64_t)VM_KERNEL_ADDRPERM(so));
6458 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6459 }
6460
6461 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6462
6463 return error;
6464 }
6465
6466 /*
6467 * Go through all UDP flows for specified socket and returns TRUE if
6468 * any flow is still attached. If need_wait is TRUE, wait on first
6469 * attached flow.
6470 */
6471 static int
6472 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6473 {
6474 struct timespec ts;
6475 lck_mtx_t *mutex_held;
6476 struct cfilhashhead *cfilhash = NULL;
6477 struct cfil_db *db = NULL;
6478 struct cfil_hash_entry *hash_entry = NULL;
6479 struct cfil_hash_entry *temp_hash_entry = NULL;
6480 struct cfil_info *cfil_info = NULL;
6481 struct cfil_entry *entry = NULL;
6482 errno_t error = 0;
6483 int kcunit;
6484 int attached = 0;
6485 uint64_t sock_flow_id = 0;
6486
6487 socket_lock_assert_owned(so);
6488
6489 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6490 if (so->so_proto->pr_getlock != NULL) {
6491 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6492 } else {
6493 mutex_held = so->so_proto->pr_domain->dom_mtx;
6494 }
6495 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6496
6497 db = so->so_cfil_db;
6498
6499 for (int i = 0; i < CFILHASHSIZE; i++) {
6500 cfilhash = &db->cfdb_hashbase[i];
6501
6502 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6503 if (hash_entry->cfentry_cfil != NULL) {
6504 cfil_info = hash_entry->cfentry_cfil;
6505 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6506 entry = &cfil_info->cfi_entries[kcunit - 1];
6507
6508 /* Are we attached to the filter? */
6509 if (entry->cfe_filter == NULL) {
6510 continue;
6511 }
6512
6513 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6514 continue;
6515 }
6516 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6517 continue;
6518 }
6519
6520 attached = 1;
6521
6522 if (need_wait == TRUE) {
6523 #if LIFECYCLE_DEBUG
6524 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6525 #endif
6526
6527 ts.tv_sec = cfil_close_wait_timeout / 1000;
6528 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6529 NSEC_PER_USEC * 1000;
6530
6531 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6532 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6533 sock_flow_id = cfil_info->cfi_sock_id;
6534
6535 error = msleep((caddr_t)cfil_info, mutex_held,
6536 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6537
6538 // Woke up from sleep, validate if cfil_info is still valid
6539 if (so->so_cfil_db == NULL ||
6540 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6541 // cfil_info is not valid, do not continue
6542 goto done;
6543 }
6544
6545 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6546
6547 #if LIFECYCLE_DEBUG
6548 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6549 #endif
6550
6551 /*
6552 * Force close in case of timeout
6553 */
6554 if (error != 0) {
6555 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6556 #if LIFECYCLE_DEBUG
6557 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6558 #endif
6559 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6560 }
6561 }
6562 goto done;
6563 }
6564 }
6565 }
6566 }
6567 }
6568
6569 done:
6570 return attached;
6571 }
6572
6573 int32_t
6574 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6575 {
6576 struct socket *so = sb->sb_so;
6577 struct cfi_buf *cfi_buf;
6578 uint64_t pending = 0;
6579 uint64_t total_pending = 0;
6580 struct cfilhashhead *cfilhash = NULL;
6581 struct cfil_db *db = NULL;
6582 struct cfil_hash_entry *hash_entry = NULL;
6583 struct cfil_hash_entry *temp_hash_entry = NULL;
6584
6585 socket_lock_assert_owned(so);
6586
6587 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6588 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6589 db = so->so_cfil_db;
6590
6591 for (int i = 0; i < CFILHASHSIZE; i++) {
6592 cfilhash = &db->cfdb_hashbase[i];
6593
6594 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6595 if (hash_entry->cfentry_cfil != NULL) {
6596 if ((sb->sb_flags & SB_RECV) == 0) {
6597 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6598 } else {
6599 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6600 }
6601
6602 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6603 /*
6604 * If we are limited by the "chars of mbufs used" roughly
6605 * adjust so we won't overcommit
6606 */
6607 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6608 pending = cfi_buf->cfi_pending_mbcnt;
6609 }
6610
6611 total_pending += pending;
6612 }
6613 }
6614 }
6615
6616 VERIFY(total_pending < INT32_MAX);
6617 #if DATA_DEBUG
6618 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6619 (uint64_t)VM_KERNEL_ADDRPERM(so),
6620 total_pending, check_thread);
6621 #endif
6622 }
6623
6624 return (int32_t)(total_pending);
6625 }
6626
6627 int
6628 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6629 {
6630 struct cfil_info *cfil_info = NULL;
6631 struct cfilhashhead *cfilhash = NULL;
6632 struct cfil_db *db = NULL;
6633 struct cfil_hash_entry *hash_entry = NULL;
6634 struct cfil_hash_entry *temp_hash_entry = NULL;
6635 errno_t error = 0;
6636 int done_count = 0;
6637 int kcunit;
6638
6639 socket_lock_assert_owned(so);
6640
6641 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6642 db = so->so_cfil_db;
6643
6644 for (int i = 0; i < CFILHASHSIZE; i++) {
6645 cfilhash = &db->cfdb_hashbase[i];
6646
6647 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6648 if (hash_entry->cfentry_cfil != NULL) {
6649 cfil_info = hash_entry->cfentry_cfil;
6650
6651 // This flow is marked as DROP
6652 if (cfil_info->cfi_flags & drop_flag) {
6653 done_count++;
6654 continue;
6655 }
6656
6657 // This flow has been shut already, skip
6658 if (cfil_info->cfi_flags & shut_flag) {
6659 continue;
6660 }
6661 // Mark flow as shut
6662 cfil_info->cfi_flags |= shut_flag;
6663 done_count++;
6664
6665 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6666 /* Disconnect incoming side */
6667 if (how != SHUT_WR) {
6668 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6669 }
6670 /* Disconnect outgoing side */
6671 if (how != SHUT_RD) {
6672 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6673 }
6674 }
6675 }
6676 }
6677 }
6678 }
6679
6680 if (done_count == 0) {
6681 error = ENOTCONN;
6682 }
6683 return error;
6684 }
6685
6686 int
6687 cfil_sock_udp_shutdown(struct socket *so, int *how)
6688 {
6689 int error = 0;
6690
6691 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6692 goto done;
6693 }
6694
6695 socket_lock_assert_owned(so);
6696
6697 CFIL_LOG(LOG_INFO, "so %llx how %d",
6698 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6699
6700 /*
6701 * Check the state of the socket before the content filter
6702 */
6703 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6704 /* read already shut down */
6705 error = ENOTCONN;
6706 goto done;
6707 }
6708 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6709 /* write already shut down */
6710 error = ENOTCONN;
6711 goto done;
6712 }
6713
6714 /*
6715 * shutdown read: SHUT_RD or SHUT_RDWR
6716 */
6717 if (*how != SHUT_WR) {
6718 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6719 if (error != 0) {
6720 goto done;
6721 }
6722 }
6723 /*
6724 * shutdown write: SHUT_WR or SHUT_RDWR
6725 */
6726 if (*how != SHUT_RD) {
6727 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6728 if (error != 0) {
6729 goto done;
6730 }
6731
6732 /*
6733 * When outgoing data is pending, we delay the shutdown at the
6734 * protocol level until the content filters give the final
6735 * verdict on the pending data.
6736 */
6737 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6738 /*
6739 * When shutting down the read and write sides at once
6740 * we can proceed to the final shutdown of the read
6741 * side. Otherwise, we just return.
6742 */
6743 if (*how == SHUT_WR) {
6744 error = EJUSTRETURN;
6745 } else if (*how == SHUT_RDWR) {
6746 *how = SHUT_RD;
6747 }
6748 }
6749 }
6750 done:
6751 return error;
6752 }
6753
6754 void
6755 cfil_sock_udp_close_wait(struct socket *so)
6756 {
6757 socket_lock_assert_owned(so);
6758
6759 while (cfil_filters_udp_attached(so, FALSE)) {
6760 /*
6761 * Notify the filters we are going away so they can detach
6762 */
6763 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6764
6765 /*
6766 * Make sure we need to wait after the filter are notified
6767 * of the disconnection
6768 */
6769 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6770 break;
6771 }
6772 }
6773 }
6774
6775 void
6776 cfil_sock_udp_is_closed(struct socket *so)
6777 {
6778 struct cfil_info *cfil_info = NULL;
6779 struct cfilhashhead *cfilhash = NULL;
6780 struct cfil_db *db = NULL;
6781 struct cfil_hash_entry *hash_entry = NULL;
6782 struct cfil_hash_entry *temp_hash_entry = NULL;
6783 errno_t error = 0;
6784 int kcunit;
6785
6786 socket_lock_assert_owned(so);
6787
6788 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6789 db = so->so_cfil_db;
6790
6791 for (int i = 0; i < CFILHASHSIZE; i++) {
6792 cfilhash = &db->cfdb_hashbase[i];
6793
6794 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6795 if (hash_entry->cfentry_cfil != NULL) {
6796 cfil_info = hash_entry->cfentry_cfil;
6797
6798 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6799 /* Let the filters know of the closing */
6800 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6801 }
6802
6803 /* Last chance to push passed data out */
6804 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6805 if (error == 0) {
6806 cfil_service_inject_queue(so, cfil_info, 1);
6807 }
6808 cfil_release_sockbuf(so, 1);
6809
6810 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6811
6812 /* Pending data needs to go */
6813 cfil_flush_queues(so, cfil_info);
6814
6815 CFIL_INFO_VERIFY(cfil_info);
6816 }
6817 }
6818 }
6819 }
6820 }
6821
6822 void
6823 cfil_sock_udp_buf_update(struct sockbuf *sb)
6824 {
6825 struct cfil_info *cfil_info = NULL;
6826 struct cfilhashhead *cfilhash = NULL;
6827 struct cfil_db *db = NULL;
6828 struct cfil_hash_entry *hash_entry = NULL;
6829 struct cfil_hash_entry *temp_hash_entry = NULL;
6830 errno_t error = 0;
6831 int outgoing;
6832 struct socket *so = sb->sb_so;
6833
6834 socket_lock_assert_owned(so);
6835
6836 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6837 if (!cfil_sbtrim) {
6838 return;
6839 }
6840
6841 db = so->so_cfil_db;
6842
6843 for (int i = 0; i < CFILHASHSIZE; i++) {
6844 cfilhash = &db->cfdb_hashbase[i];
6845
6846 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6847 if (hash_entry->cfentry_cfil != NULL) {
6848 cfil_info = hash_entry->cfentry_cfil;
6849
6850 if ((sb->sb_flags & SB_RECV) == 0) {
6851 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6852 return;
6853 }
6854 outgoing = 1;
6855 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6856 } else {
6857 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6858 return;
6859 }
6860 outgoing = 0;
6861 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6862 }
6863
6864 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6865 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6866
6867 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6868 if (error == 0) {
6869 cfil_service_inject_queue(so, cfil_info, outgoing);
6870 }
6871 cfil_release_sockbuf(so, outgoing);
6872 }
6873 }
6874 }
6875 }
6876 }
6877
6878 void
6879 cfil_filter_show(u_int32_t kcunit)
6880 {
6881 struct content_filter *cfc = NULL;
6882 struct cfil_entry *entry;
6883 int count = 0;
6884
6885 if (content_filters == NULL) {
6886 return;
6887 }
6888 if (kcunit > MAX_CONTENT_FILTER) {
6889 return;
6890 }
6891
6892 cfil_rw_lock_shared(&cfil_lck_rw);
6893
6894 if (content_filters[kcunit - 1] == NULL) {
6895 cfil_rw_unlock_shared(&cfil_lck_rw);
6896 return;
6897 }
6898 cfc = content_filters[kcunit - 1];
6899
6900 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6901 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6902 if (cfc->cf_flags & CFF_DETACHING) {
6903 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6904 }
6905 if (cfc->cf_flags & CFF_ACTIVE) {
6906 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6907 }
6908 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6909 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6910 }
6911
6912 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6913 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6914 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6915
6916 count++;
6917
6918 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6919 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6920 } else {
6921 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6922 }
6923 }
6924 }
6925
6926 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6927
6928 cfil_rw_unlock_shared(&cfil_lck_rw);
6929 }
6930
6931 void
6932 cfil_info_show(void)
6933 {
6934 struct cfil_info *cfil_info;
6935 int count = 0;
6936
6937 cfil_rw_lock_shared(&cfil_lck_rw);
6938
6939 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6940
6941 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6942 count++;
6943
6944 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6945
6946 if (cfil_info->cfi_flags & CFIF_DROP) {
6947 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6948 }
6949 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6950 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6951 }
6952 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6953 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6954 }
6955 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6956 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6957 }
6958 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6959 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6960 }
6961 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6962 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6963 }
6964 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6965 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6966 }
6967 }
6968
6969 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6970
6971 cfil_rw_unlock_shared(&cfil_lck_rw);
6972 }
6973
6974 bool
6975 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int64_t current_time)
6976 {
6977 if (cfil_info && cfil_info->cfi_hash_entry &&
6978 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int64_t)timeout)) {
6979 #if GC_DEBUG
6980 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6981 #endif
6982 return true;
6983 }
6984 return false;
6985 }
6986
6987 bool
6988 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6989 {
6990 struct cfil_entry *entry;
6991 struct timeval current_tv;
6992 struct timeval diff_time;
6993
6994 if (cfil_info == NULL) {
6995 return false;
6996 }
6997
6998 /*
6999 * If we have queued up more data than passed offset and we haven't received
7000 * an action from user space for a while (the user space filter might have crashed),
7001 * return action timed out.
7002 */
7003 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
7004 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
7005 microuptime(&current_tv);
7006
7007 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7008 entry = &cfil_info->cfi_entries[kcunit - 1];
7009
7010 if (entry->cfe_filter == NULL) {
7011 continue;
7012 }
7013
7014 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
7015 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
7016 // haven't gotten an action from this filter, check timeout
7017 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
7018 if (diff_time.tv_sec >= timeout) {
7019 #if GC_DEBUG
7020 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
7021 #endif
7022 return true;
7023 }
7024 }
7025 }
7026 }
7027 return false;
7028 }
7029
7030 bool
7031 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7032 {
7033 if (cfil_info == NULL) {
7034 return false;
7035 }
7036
7037 /*
7038 * Clean up flow if it exceeded queue thresholds
7039 */
7040 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7041 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7042 #if GC_DEBUG
7043 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7044 cfil_udp_gc_mbuf_num_max,
7045 cfil_udp_gc_mbuf_cnt_max,
7046 cfil_info->cfi_snd.cfi_tail_drop_cnt,
7047 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7048 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7049 #endif
7050 return true;
7051 }
7052
7053 return false;
7054 }
7055
7056 static void
7057 cfil_udp_gc_thread_sleep(bool forever)
7058 {
7059 if (forever) {
7060 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
7061 THREAD_INTERRUPTIBLE);
7062 } else {
7063 uint64_t deadline = 0;
7064 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
7065 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7066
7067 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
7068 THREAD_INTERRUPTIBLE, deadline);
7069 }
7070 }
7071
7072 static void
7073 cfil_udp_gc_thread_func(void *v, wait_result_t w)
7074 {
7075 #pragma unused(v, w)
7076
7077 ASSERT(cfil_udp_gc_thread == current_thread());
7078 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7079
7080 // Kick off gc shortly
7081 cfil_udp_gc_thread_sleep(false);
7082 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
7083 /* NOTREACHED */
7084 }
7085
7086 static void
7087 cfil_info_udp_expire(void *v, wait_result_t w)
7088 {
7089 #pragma unused(v, w)
7090
7091 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
7092 static uint32_t expired_count = 0;
7093
7094 struct cfil_info *cfil_info;
7095 struct cfil_hash_entry *hash_entry;
7096 struct cfil_db *db;
7097 struct socket *so;
7098 u_int64_t current_time = 0;
7099
7100 current_time = net_uptime();
7101
7102 // Get all expired UDP flow ids
7103 cfil_rw_lock_shared(&cfil_lck_rw);
7104
7105 if (cfil_sock_udp_attached_count == 0) {
7106 cfil_rw_unlock_shared(&cfil_lck_rw);
7107 goto go_sleep;
7108 }
7109
7110 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
7111 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
7112 break;
7113 }
7114
7115 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
7116 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
7117 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7118 cfil_info_buffer_threshold_exceeded(cfil_info)) {
7119 expired_array[expired_count] = cfil_info->cfi_sock_id;
7120 expired_count++;
7121 }
7122 }
7123 }
7124 cfil_rw_unlock_shared(&cfil_lck_rw);
7125
7126 if (expired_count == 0) {
7127 goto go_sleep;
7128 }
7129
7130 for (uint32_t i = 0; i < expired_count; i++) {
7131 // Search for socket (UDP only and lock so)
7132 so = cfil_socket_from_sock_id(expired_array[i], true);
7133 if (so == NULL) {
7134 continue;
7135 }
7136
7137 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
7138 if (cfil_info == NULL) {
7139 goto unlock;
7140 }
7141
7142 db = so->so_cfil_db;
7143 hash_entry = cfil_info->cfi_hash_entry;
7144
7145 if (db == NULL || hash_entry == NULL) {
7146 goto unlock;
7147 }
7148
7149 #if GC_DEBUG || LIFECYCLE_DEBUG
7150 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
7151 #endif
7152
7153 cfil_db_delete_entry(db, hash_entry);
7154 CFIL_INFO_FREE(cfil_info);
7155 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7156
7157 if (so->so_flags & SOF_CONTENT_FILTER) {
7158 if (db->cfdb_count == 0) {
7159 so->so_flags &= ~SOF_CONTENT_FILTER;
7160 }
7161 VERIFY(so->so_usecount > 0);
7162 so->so_usecount--;
7163 }
7164 unlock:
7165 socket_unlock(so, 1);
7166 }
7167
7168 #if GC_DEBUG
7169 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
7170 #endif
7171 expired_count = 0;
7172
7173 go_sleep:
7174
7175 // Sleep forever (until waken up) if no more UDP flow to clean
7176 cfil_rw_lock_shared(&cfil_lck_rw);
7177 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7178 cfil_rw_unlock_shared(&cfil_lck_rw);
7179 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7180 /* NOTREACHED */
7181 }
7182
7183 struct m_tag *
7184 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7185 {
7186 struct m_tag *tag = NULL;
7187 struct cfil_tag *ctag = NULL;
7188 struct cfil_hash_entry *hash_entry = NULL;
7189 struct inpcb *inp = NULL;
7190
7191 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7192 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7193 return NULL;
7194 }
7195
7196 inp = sotoinpcb(cfil_info->cfi_so);
7197
7198 /* Allocate a tag */
7199 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7200 sizeof(struct cfil_tag), M_DONTWAIT, m);
7201
7202 if (tag) {
7203 ctag = (struct cfil_tag*)(tag + 1);
7204 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7205 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7206 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7207
7208 hash_entry = cfil_info->cfi_hash_entry;
7209 if (hash_entry->cfentry_family == AF_INET6) {
7210 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7211 &hash_entry->cfentry_faddr.addr6,
7212 hash_entry->cfentry_fport);
7213 } else if (hash_entry->cfentry_family == AF_INET) {
7214 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7215 hash_entry->cfentry_faddr.addr46.ia46_addr4,
7216 hash_entry->cfentry_fport);
7217 }
7218 m_tag_prepend(m, tag);
7219 return tag;
7220 }
7221 return NULL;
7222 }
7223
7224 struct m_tag *
7225 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7226 struct sockaddr **faddr, int *inp_flags)
7227 {
7228 struct m_tag *tag = NULL;
7229 struct cfil_tag *ctag = NULL;
7230
7231 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7232 if (tag) {
7233 ctag = (struct cfil_tag *)(tag + 1);
7234 if (state_change_cnt) {
7235 *state_change_cnt = ctag->cfil_so_state_change_cnt;
7236 }
7237 if (options) {
7238 *options = ctag->cfil_so_options;
7239 }
7240 if (faddr) {
7241 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7242 }
7243 if (inp_flags) {
7244 *inp_flags = ctag->cfil_inp_flags;
7245 }
7246
7247 /*
7248 * Unlink tag and hand it over to caller.
7249 * Note that caller will be responsible to free it.
7250 */
7251 m_tag_unlink(m, tag);
7252 return tag;
7253 }
7254 return NULL;
7255 }
7256
7257 boolean_t
7258 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7259 {
7260 struct m_tag *tag = NULL;
7261 struct cfil_tag *ctag = NULL;
7262
7263 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7264 if (tag) {
7265 ctag = (struct cfil_tag *)(tag + 1);
7266 if (inp_flags) {
7267 *inp_flags = ctag->cfil_inp_flags;
7268 }
7269 return true;
7270 }
7271 return false;
7272 }
7273
7274 static int
7275 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7276 {
7277 struct content_filter *cfc = NULL;
7278 errno_t error = 0;
7279 size_t msgsize = 0;
7280
7281 if (buffer == NULL || stats_count == 0) {
7282 return error;
7283 }
7284
7285 if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7286 return error;
7287 }
7288
7289 cfc = content_filters[kcunit - 1];
7290 if (cfc == NULL) {
7291 return error;
7292 }
7293
7294 /* Would be wasteful to try */
7295 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7296 error = ENOBUFS;
7297 goto done;
7298 }
7299
7300 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7301 buffer->msghdr.cfm_len = (uint32_t)msgsize;
7302 buffer->msghdr.cfm_version = 1;
7303 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7304 buffer->msghdr.cfm_op = CFM_OP_STATS;
7305 buffer->msghdr.cfm_sock_id = 0;
7306 buffer->count = stats_count;
7307
7308 #if STATS_DEBUG
7309 CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7310 kcunit,
7311 (unsigned long)msgsize,
7312 (unsigned long)sizeof(struct cfil_msg_stats_report),
7313 (unsigned long)sizeof(struct cfil_msg_sock_stats),
7314 (unsigned long)stats_count);
7315 #endif
7316
7317 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7318 buffer,
7319 msgsize,
7320 CTL_DATA_EOR);
7321 if (error != 0) {
7322 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7323 goto done;
7324 }
7325 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7326
7327 #if STATS_DEBUG
7328 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7329 #endif
7330
7331 done:
7332
7333 if (error == ENOBUFS) {
7334 OSIncrementAtomic(
7335 &cfil_stats.cfs_stats_event_flow_control);
7336
7337 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7338 cfil_rw_lock_exclusive(&cfil_lck_rw);
7339 }
7340
7341 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7342
7343 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7344 } else if (error != 0) {
7345 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7346 }
7347
7348 return error;
7349 }
7350
7351 static void
7352 cfil_stats_report_thread_sleep(bool forever)
7353 {
7354 #if STATS_DEBUG
7355 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7356 #endif
7357
7358 if (forever) {
7359 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7360 THREAD_INTERRUPTIBLE);
7361 } else {
7362 uint64_t deadline = 0;
7363 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7364 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7365
7366 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7367 THREAD_INTERRUPTIBLE, deadline);
7368 }
7369 }
7370
7371 static void
7372 cfil_stats_report_thread_func(void *v, wait_result_t w)
7373 {
7374 #pragma unused(v, w)
7375
7376 ASSERT(cfil_stats_report_thread == current_thread());
7377 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7378
7379 // Kick off gc shortly
7380 cfil_stats_report_thread_sleep(false);
7381 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7382 /* NOTREACHED */
7383 }
7384
7385 static bool
7386 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7387 struct cfil_info *cfil_info,
7388 struct cfil_entry *entry,
7389 struct timeval current_tv)
7390 {
7391 struct cfil_stats_report_buffer *buffer = NULL;
7392 struct cfil_msg_sock_stats *flow_array = NULL;
7393 struct cfil_msg_sock_stats *stats = NULL;
7394 struct inpcb *inp = NULL;
7395 struct timeval diff_time;
7396 uint64_t diff_time_usecs;
7397 int index = 0;
7398
7399 if (entry->cfe_stats_report_frequency == 0) {
7400 return false;
7401 }
7402
7403 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7404 if (buffer == NULL) {
7405 #if STATS_DEBUG
7406 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7407 #endif
7408 return false;
7409 }
7410
7411 timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7412 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7413
7414 #if STATS_DEBUG
7415 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7416 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7417 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7418 (unsigned long long)current_tv.tv_sec,
7419 (unsigned long long)current_tv.tv_usec,
7420 (unsigned long long)diff_time.tv_sec,
7421 (unsigned long long)diff_time.tv_usec,
7422 (unsigned long long)diff_time_usecs,
7423 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7424 cfil_info->cfi_sock_id);
7425 #endif
7426
7427 // Compare elapsed time in usecs
7428 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7429 #if STATS_DEBUG
7430 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7431 cfil_info->cfi_byte_inbound_count,
7432 entry->cfe_byte_inbound_count_reported);
7433 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7434 cfil_info->cfi_byte_outbound_count,
7435 entry->cfe_byte_outbound_count_reported);
7436 #endif
7437 // Check if flow has new bytes that have not been reported
7438 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7439 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7440 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7441 index = global_cfil_stats_counts[kcunit - 1];
7442
7443 stats = &flow_array[index];
7444 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7445 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7446 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7447
7448 if (entry->cfe_laddr_sent == false) {
7449 /* cache it if necessary */
7450 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7451 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7452 if (inp != NULL) {
7453 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7454 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7455 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7456 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7457 src, dst, !IS_INP_V6(inp), outgoing);
7458 }
7459 }
7460
7461 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7462 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7463 entry->cfe_laddr_sent = true;
7464 }
7465 }
7466
7467 global_cfil_stats_counts[kcunit - 1]++;
7468
7469 entry->cfe_stats_report_ts = current_tv;
7470 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7471 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7472 #if STATS_DEBUG
7473 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7474 #endif
7475 CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7476 return true;
7477 }
7478 }
7479 return false;
7480 }
7481
7482 static void
7483 cfil_stats_report(void *v, wait_result_t w)
7484 {
7485 #pragma unused(v, w)
7486
7487 struct cfil_info *cfil_info = NULL;
7488 struct cfil_entry *entry = NULL;
7489 struct timeval current_tv;
7490 uint32_t flow_count = 0;
7491 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7492 bool flow_reported = false;
7493
7494 #if STATS_DEBUG
7495 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7496 #endif
7497
7498 do {
7499 // Collect all sock ids of flows that has new stats
7500 cfil_rw_lock_shared(&cfil_lck_rw);
7501
7502 if (cfil_sock_attached_stats_count == 0) {
7503 #if STATS_DEBUG
7504 CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7505 #endif
7506 cfil_rw_unlock_shared(&cfil_lck_rw);
7507 goto go_sleep;
7508 }
7509
7510 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7511 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7512 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7513 }
7514 global_cfil_stats_counts[kcunit - 1] = 0;
7515 }
7516
7517 microuptime(&current_tv);
7518 flow_count = 0;
7519
7520 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7521 if (saved_next_sock_id != 0 &&
7522 saved_next_sock_id == cfil_info->cfi_sock_id) {
7523 // Here is where we left off previously, start accumulating
7524 saved_next_sock_id = 0;
7525 }
7526
7527 if (saved_next_sock_id == 0) {
7528 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7529 // Examine a fixed number of flows each round. Remember the current flow
7530 // so we can start from here for next loop
7531 saved_next_sock_id = cfil_info->cfi_sock_id;
7532 break;
7533 }
7534
7535 flow_reported = false;
7536 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7537 entry = &cfil_info->cfi_entries[kcunit - 1];
7538 if (entry->cfe_filter == NULL) {
7539 #if STATS_DEBUG
7540 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7541 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7542 #endif
7543 continue;
7544 }
7545
7546 if ((entry->cfe_stats_report_frequency > 0) &&
7547 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7548 flow_reported = true;
7549 }
7550 }
7551 if (flow_reported == true) {
7552 flow_count++;
7553 }
7554 }
7555 }
7556
7557 if (flow_count > 0) {
7558 #if STATS_DEBUG
7559 CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7560 #endif
7561 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7562 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7563 global_cfil_stats_counts[kcunit - 1] > 0) {
7564 cfil_dispatch_stats_event_locked(kcunit,
7565 global_cfil_stats_report_buffers[kcunit - 1],
7566 global_cfil_stats_counts[kcunit - 1]);
7567 }
7568 }
7569 } else {
7570 cfil_rw_unlock_shared(&cfil_lck_rw);
7571 goto go_sleep;
7572 }
7573
7574 cfil_rw_unlock_shared(&cfil_lck_rw);
7575
7576 // Loop again if we haven't finished the whole cfil_info list
7577 } while (saved_next_sock_id != 0);
7578
7579 go_sleep:
7580
7581 // Sleep forever (until waken up) if no more flow to report
7582 cfil_rw_lock_shared(&cfil_lck_rw);
7583 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7584 cfil_rw_unlock_shared(&cfil_lck_rw);
7585 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7586 /* NOTREACHED */
7587 }