]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by INET/INET6 sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many INET/INET6 sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
50 * UDP, ICMP, etc).
51 * - The current implementation supports up to two simultaneous content filters
52 * for iOS devices and eight simultaneous content filters for OSX.
53 *
54 *
55 * NECP FILTER CONTROL UNIT
56 *
57 * A user space filter agent uses the Network Extension Control Policy (NECP)
58 * database to specify which INET/INET6 sockets need to be filtered. The NECP
59 * criteria may be based on a variety of properties like user ID or proc UUID.
60 *
61 * The NECP "filter control unit" is used by the socket content filter subsystem
62 * to deliver the relevant INET/INET6 content information to the appropriate
63 * user space filter agent via its kernel control socket instance.
64 * This works as follows:
65 *
66 * 1) The user space filter agent specifies an NECP filter control unit when
67 * in adds its filtering rules to the NECP database.
68 *
69 * 2) The user space filter agent also sets its NECP filter control unit on the
70 * content filter kernel control socket via the socket option
71 * CFIL_OPT_NECP_CONTROL_UNIT.
72 *
73 * 3) The NECP database is consulted to find out if a given INET/INET6 socket
74 * needs to be subjected to content filtering and returns the corresponding
75 * NECP filter control unit -- the NECP filter control unit is actually
76 * stored in the INET/INET6 socket structure so the NECP lookup is really simple.
77 *
78 * 4) The NECP filter control unit is then used to find the corresponding
79 * kernel control socket instance.
80 *
81 * Note: NECP currently supports a single filter control unit per INET/INET6 socket
82 * but this restriction may be soon lifted.
83 *
84 *
85 * THE MESSAGING PROTOCOL
86 *
87 * The socket content filter subsystem and a user space filter agent
88 * communicate over the kernel control socket via an asynchronous
89 * messaging protocol (this is not a request-response protocol).
90 * The socket content filter subsystem sends event messages to the user
91 * space filter agent about the INET/INET6 sockets it is interested to filter.
92 * The user space filter agent sends action messages to either allow
93 * data to pass or to disallow the data flow (and drop the connection).
94 *
95 * All messages over a content filter kernel control socket share the same
96 * common header of type "struct cfil_msg_hdr". The message type tells if
97 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
98 * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
99 * For TCP, flows are per-socket. For UDP and other datagrame protocols, there
100 * could be multiple flows per socket.
101 *
102 * Note the message header length field may be padded for alignment and can
103 * be larger than the actual content of the message.
104 * The field "cfm_op" describe the kind of event or action.
105 *
106 * Here are the kinds of content filter events:
107 * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
108 * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
109 * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
110 * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
111 *
112 *
113 * EVENT MESSAGES
114 *
115 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
116 * data that is being sent or received. The position of this span of data
117 * in the data flow is described by a set of start and end offsets. These
118 * are absolute 64 bits offsets. The first byte sent (or received) starts
119 * at offset 0 and ends at offset 1. The length of the content data
120 * is given by the difference between the end offset and the start offset.
121 *
122 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
123 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
124 * action message is sent by the user space filter agent.
125 *
126 * Note: absolute 64 bits offsets should be large enough for the foreseeable
127 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
128 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
129 *
130 * They are two kinds of primary content filter actions:
131 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
132 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
133 *
134 * There is also an action to mark a given client flow as already filtered
135 * at a higher level, CFM_OP_BLESS_CLIENT.
136 *
137 *
138 * ACTION MESSAGES
139 *
140 * The CFM_OP_DATA_UPDATE action messages let the user space filter
141 * agent allow data to flow up to the specified pass offset -- there
142 * is a pass offset for outgoing data and a pass offset for incoming data.
143 * When a new INET/INET6 socket is attached to the content filter and a flow is
144 * created, each pass offset is initially set to 0 so no data is allowed to pass by
145 * default. When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
146 * then the data flow becomes unrestricted.
147 *
148 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
149 * with a pass offset smaller than the pass offset of a previous
150 * CFM_OP_DATA_UPDATE message is silently ignored.
151 *
152 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
153 * to tell the kernel how much data it wants to see by using the peek offsets.
154 * Just like pass offsets, there is a peek offset for each direction.
155 * When a new INET/INET6 flow is created, each peek offset is initially set to 0
156 * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
157 * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
158 * by the user space filter agent. When the peek offset is set to CFM_MAX_OFFSET via
159 * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
160 *
161 * Note that peek offsets cannot be smaller than the corresponding pass offset.
162 * Also a peek offsets cannot be smaller than the corresponding end offset
163 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
164 * to set a too small peek value is silently ignored.
165 *
166 *
167 * PER FLOW "struct cfil_info"
168 *
169 * As soon as a INET/INET6 socket gets attached to a content filter, a
170 * "struct cfil_info" is created to hold the content filtering state for this
171 * socket. For UDP and other datagram protocols, as soon as traffic is seen for
172 * each new flow identified by its 4-tuple of source address/port and destination
173 * address/port, a "struct cfil_info" is created. Each datagram socket may
174 * have multiple flows maintained in a hash table of "struct cfil_info" entries.
175 *
176 * The content filtering state is made of the following information
177 * for each direction:
178 * - The current pass offset;
179 * - The first and last offsets of the data pending, waiting for a filtering
180 * decision;
181 * - The inject queue for data that passed the filters and that needs
182 * to be re-injected;
183 * - A content filter specific state in a set of "struct cfil_entry"
184 *
185 *
186 * CONTENT FILTER STATE "struct cfil_entry"
187 *
188 * The "struct cfil_entry" maintains the information most relevant to the
189 * message handling over a kernel control socket with a user space filter agent.
190 *
191 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
192 * to the kernel control socket unit it corresponds to and also has a pointer
193 * to the corresponding "struct content_filter".
194 *
195 * For each direction, "struct cfil_entry" maintains the following information:
196 * - The pass offset
197 * - The peek offset
198 * - The offset of the last data peeked at by the filter
199 * - A queue of data that's waiting to be delivered to the user space filter
200 * agent on the kernel control socket
201 * - A queue of data for which event messages have been sent on the kernel
202 * control socket and are pending for a filtering decision.
203 *
204 *
205 * CONTENT FILTER QUEUES
206 *
207 * Data that is being filtered is steered away from the INET/INET6 socket buffer
208 * and instead will sit in one of three content filter queues until the data
209 * can be re-injected into the INET/INET6 socket buffer.
210 *
211 * A content filter queue is represented by "struct cfil_queue" that contains
212 * a list of mbufs and the start and end offset of the data span of
213 * the list of mbufs.
214 *
215 * The data moves into the three content filter queues according to this
216 * sequence:
217 * a) The "cfe_ctl_q" of "struct cfil_entry"
218 * b) The "cfe_pending_q" of "struct cfil_entry"
219 * c) The "cfi_inject_q" of "struct cfil_info"
220 *
221 * Note: The sequence (a),(b) may be repeated several times if there is more
222 * than one content filter attached to the INET/INET6 socket.
223 *
224 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
225 * kernel conntrol socket for two reasons:
226 * - The peek offset is less that the end offset of the mbuf data
227 * - The kernel control socket is flow controlled
228 *
229 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
230 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
231 * socket and are waiting for a pass action message fromn the user space
232 * filter agent. An mbuf length must be fully allowed to pass to be removed
233 * from the cfe_pending_q.
234 *
235 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
236 * by the user space filter agent and that needs to be re-injected into the
237 * INET/INET6 socket.
238 *
239 *
240 * IMPACT ON FLOW CONTROL
241 *
242 * An essential aspect of the content filer subsystem is to minimize the
243 * impact on flow control of the INET/INET6 sockets being filtered.
244 *
245 * The processing overhead of the content filtering may have an effect on
246 * flow control by adding noticeable delays and cannot be eliminated --
247 * care must be taken by the user space filter agent to minimize the
248 * processing delays.
249 *
250 * The amount of data being filtered is kept in buffers while waiting for
251 * a decision by the user space filter agent. This amount of data pending
252 * needs to be subtracted from the amount of data available in the
253 * corresponding INET/INET6 socket buffer. This is done by modifying
254 * sbspace() and tcp_sbspace() to account for amount of data pending
255 * in the content filter.
256 *
257 *
258 * LOCKING STRATEGY
259 *
260 * The global state of content filter subsystem is protected by a single
261 * read-write lock "cfil_lck_rw". The data flow can be done with the
262 * cfil read-write lock held as shared so it can be re-entered from multiple
263 * threads.
264 *
265 * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
266 * protected by the socket lock.
267 *
268 * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
269 * is held. That's why we have some sequences where we drop the cfil read-write
270 * lock before taking the INET/INET6 lock.
271 *
272 * It is also important to lock the INET/INET6 socket buffer while the content
273 * filter is modifying the amount of pending data. Otherwise the calculations
274 * in sbspace() and tcp_sbspace() could be wrong.
275 *
276 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
277 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
278 *
279 * Actually "cfe_link" and "cfe_filter" are protected by both by
280 * "cfil_lck_rw" and the socket lock: they may be modified only when
281 * "cfil_lck_rw" is exclusive and the socket is locked.
282 *
283 * To read the other fields of "struct content_filter" we have to take
284 * "cfil_lck_rw" in shared mode.
285 *
286 * DATAGRAM SPECIFICS:
287 *
288 * The socket content filter supports all INET/INET6 protocols. However
289 * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
290 * are slightly different.
291 *
292 * Each datagram socket may have multiple flows. Each flow is identified
293 * by the flow's source address/port and destination address/port tuple
294 * and is represented as a "struct cfil_info" entry. For each socket,
295 * a hash table is used to maintain the collection of flows under that socket.
296 *
297 * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
298 * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt. This portion
299 * of the cfi_sock_id is used locate the socket during socket lookup. The lowest 32-bits
300 * of the cfi_sock_id contains a hash of the flow's 4-tuple. This portion of the cfi_sock_id
301 * is used as the hash value for the flow hash table lookup within the parent socket.
302 *
303 * Since datagram sockets may not be connected, flow states may not be maintained in the
304 * socket structures and thus have to be saved for each packet. These saved states will be
305 * used for both outgoing and incoming reinjections. For outgoing packets, destination
306 * address/port as well as the current socket states will be saved. During reinjection,
307 * these saved states will be used instead. For incoming packets, control and address
308 * mbufs will be chained to the data. During reinjection, the whole chain will be queued
309 * onto the incoming socket buffer.
310 *
311 * LIMITATIONS
312 *
313 * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
314 *
315 * - Does not support TCP unordered messages
316 */
317
318 /*
319 * TO DO LIST
320 *
321 * Deal with OOB
322 *
323 */
324
325 #include <sys/types.h>
326 #include <sys/kern_control.h>
327 #include <sys/queue.h>
328 #include <sys/domain.h>
329 #include <sys/protosw.h>
330 #include <sys/syslog.h>
331 #include <sys/systm.h>
332 #include <sys/param.h>
333 #include <sys/mbuf.h>
334
335 #include <kern/locks.h>
336 #include <kern/zalloc.h>
337 #include <kern/debug.h>
338
339 #include <net/content_filter.h>
340 #include <net/content_filter_crypto.h>
341
342 #define _IP_VHL
343 #include <netinet/ip.h>
344 #include <netinet/in_pcb.h>
345 #include <netinet/tcp.h>
346 #include <netinet/tcp_var.h>
347 #include <netinet/udp.h>
348 #include <netinet/udp_var.h>
349
350 #include <string.h>
351 #include <libkern/libkern.h>
352 #include <kern/sched_prim.h>
353 #include <kern/task.h>
354 #include <mach/task_info.h>
355
356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
357 #define MAX_CONTENT_FILTER 2
358 #else
359 #define MAX_CONTENT_FILTER 8
360 #endif
361
362 extern struct inpcbinfo ripcbinfo;
363 struct cfil_entry;
364
365 /*
366 * The structure content_filter represents a user space content filter
367 * It's created and associated with a kernel control socket instance
368 */
369 struct content_filter {
370 kern_ctl_ref cf_kcref;
371 u_int32_t cf_kcunit;
372 u_int32_t cf_flags;
373
374 uint32_t cf_necp_control_unit;
375
376 uint32_t cf_sock_count;
377 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
378
379 cfil_crypto_state_t cf_crypto_state;
380 };
381
382 #define CFF_ACTIVE 0x01
383 #define CFF_DETACHING 0x02
384 #define CFF_FLOW_CONTROLLED 0x04
385
386 struct content_filter **content_filters = NULL;
387 uint32_t cfil_active_count = 0; /* Number of active content filters */
388 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
389 uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
390 uint32_t cfil_sock_attached_stats_count = 0; /* Number of sockets requested periodic stats report */
391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
392
393 static kern_ctl_ref cfil_kctlref = NULL;
394
395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
396 static lck_attr_t *cfil_lck_attr = NULL;
397 static lck_grp_t *cfil_lck_grp = NULL;
398 decl_lck_rw_data(static, cfil_lck_rw);
399
400 #define CFIL_RW_LCK_MAX 8
401
402 int cfil_rw_nxt_lck = 0;
403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
404
405 int cfil_rw_nxt_unlck = 0;
406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
407
408 static ZONE_DECLARE(content_filter_zone, "content_filter",
409 sizeof(struct content_filter), ZC_NONE);
410
411 MBUFQ_HEAD(cfil_mqhead);
412
413 struct cfil_queue {
414 uint64_t q_start; /* offset of first byte in queue */
415 uint64_t q_end; /* offset of last byte in queue */
416 struct cfil_mqhead q_mq;
417 };
418
419 /*
420 * struct cfil_entry
421 *
422 * The is one entry per content filter
423 */
424 struct cfil_entry {
425 TAILQ_ENTRY(cfil_entry) cfe_link;
426 SLIST_ENTRY(cfil_entry) cfe_order_link;
427 struct content_filter *cfe_filter;
428
429 struct cfil_info *cfe_cfil_info;
430 uint32_t cfe_flags;
431 uint32_t cfe_necp_control_unit;
432 struct timeval cfe_last_event; /* To user space */
433 struct timeval cfe_last_action; /* From user space */
434 uint64_t cfe_byte_inbound_count_reported; /* stats already been reported */
435 uint64_t cfe_byte_outbound_count_reported; /* stats already been reported */
436 struct timeval cfe_stats_report_ts; /* Timestamp for last stats report */
437 uint32_t cfe_stats_report_frequency; /* Interval for stats report in msecs */
438 boolean_t cfe_laddr_sent;
439
440 struct cfe_buf {
441 /*
442 * cfe_pending_q holds data that has been delivered to
443 * the filter and for which we are waiting for an action
444 */
445 struct cfil_queue cfe_pending_q;
446 /*
447 * This queue is for data that has not be delivered to
448 * the content filter (new data, pass peek or flow control)
449 */
450 struct cfil_queue cfe_ctl_q;
451
452 uint64_t cfe_pass_offset;
453 uint64_t cfe_peek_offset;
454 uint64_t cfe_peeked;
455 } cfe_snd, cfe_rcv;
456 };
457
458 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
459 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
460 #define CFEF_DATA_START 0x0004 /* can send data event */
461 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
462 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
463 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
464 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
465 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
466
467
468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
469 struct timeval64 _tdiff; \
470 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
471 timersub(t1, t0, &_tdiff); \
472 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
473 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
474 (cfil)->cfi_op_list_ctr ++; \
475 }
476
477 struct cfil_hash_entry;
478
479 /*
480 * struct cfil_info
481 *
482 * There is a struct cfil_info per socket
483 */
484 struct cfil_info {
485 TAILQ_ENTRY(cfil_info) cfi_link;
486 TAILQ_ENTRY(cfil_info) cfi_link_stats;
487 struct socket *cfi_so;
488 uint64_t cfi_flags;
489 uint64_t cfi_sock_id;
490 struct timeval64 cfi_first_event;
491 uint32_t cfi_op_list_ctr;
492 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
493 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
494 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
495 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
496
497 int cfi_dir;
498 uint64_t cfi_byte_inbound_count;
499 uint64_t cfi_byte_outbound_count;
500
501 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
502 u_int32_t cfi_filter_control_unit;
503 u_int32_t cfi_debug;
504 struct cfi_buf {
505 /*
506 * cfi_pending_first and cfi_pending_last describe the total
507 * amount of data outstanding for all the filters on
508 * this socket and data in the flow queue
509 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
510 */
511 uint64_t cfi_pending_first;
512 uint64_t cfi_pending_last;
513 uint32_t cfi_pending_mbcnt;
514 uint32_t cfi_pending_mbnum;
515 uint32_t cfi_tail_drop_cnt;
516 /*
517 * cfi_pass_offset is the minimum of all the filters
518 */
519 uint64_t cfi_pass_offset;
520 /*
521 * cfi_inject_q holds data that needs to be re-injected
522 * into the socket after filtering and that can
523 * be queued because of flow control
524 */
525 struct cfil_queue cfi_inject_q;
526 } cfi_snd, cfi_rcv;
527
528 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
529 struct cfil_hash_entry *cfi_hash_entry;
530 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
531 os_refcnt_t cfi_ref_count;
532 } __attribute__((aligned(8)));
533
534 #define CFIF_DROP 0x0001 /* drop action applied */
535 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
536 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
537 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
538 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
539 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
540 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
541 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
542 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
543
544 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
545 #define CFI_SHIFT_GENCNT 32
546 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
547 #define CFI_SHIFT_FLOWHASH 0
548
549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
550
551 static ZONE_DECLARE(cfil_info_zone, "cfil_info",
552 sizeof(struct cfil_info), ZC_NONE);
553
554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
556
557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
559
560 /*
561 * UDP Socket Support
562 */
563 LIST_HEAD(cfilhashhead, cfil_hash_entry);
564 #define CFILHASHSIZE 16
565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
566
567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
571 (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
572 #define IS_RAW(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW && so->so_proto->pr_protocol == IPPROTO_RAW)
573
574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
576 #else
577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
578 #endif
579
580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
583
584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
585 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
588 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
592 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
593 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
595 ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
596 entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
598
599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
600 (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
601 (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
602 so->so_proto->pr_type != SOCK_STREAM || \
603 so->so_proto->pr_protocol != IPPROTO_TCP || \
604 (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
605 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
606
607 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
608
609 #define CFIL_INFO_FREE(cfil_info) \
610 if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
611 cfil_info_free(cfil_info); \
612 }
613
614 /*
615 * Periodic Statistics Report:
616 */
617 static struct thread *cfil_stats_report_thread;
618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC 500 // Highest report frequency
619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
620 #define CFIL_STATS_REPORT_MAX_COUNT 50 // Max stats to be reported per run
621
622 /* This buffer must have same layout as struct cfil_msg_stats_report */
623 struct cfil_stats_report_buffer {
624 struct cfil_msg_hdr msghdr;
625 uint32_t count;
626 struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
627 };
628 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
629 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
630
631 /*
632 * UDP Garbage Collection:
633 */
634 static struct thread *cfil_udp_gc_thread;
635 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
636 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
637 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
639
640 /*
641 * UDP flow queue thresholds
642 */
643 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
644 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
645 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
646 /*
647 * UDP flow queue threshold globals:
648 */
649 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
650 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
651
652 /*
653 * struct cfil_hash_entry
654 *
655 * Hash entry for cfil_info
656 */
657 struct cfil_hash_entry {
658 LIST_ENTRY(cfil_hash_entry) cfentry_link;
659 struct cfil_info *cfentry_cfil;
660 u_short cfentry_fport;
661 u_short cfentry_lport;
662 sa_family_t cfentry_family;
663 u_int32_t cfentry_flowhash;
664 u_int64_t cfentry_lastused;
665 union {
666 /* foreign host table entry */
667 struct in_addr_4in6 addr46;
668 struct in6_addr addr6;
669 } cfentry_faddr;
670 union {
671 /* local host table entry */
672 struct in_addr_4in6 addr46;
673 struct in6_addr addr6;
674 } cfentry_laddr;
675 uint8_t cfentry_laddr_updated: 1;
676 uint8_t cfentry_lport_updated: 1;
677 uint8_t cfentry_reserved: 6;
678 };
679
680 /*
681 * struct cfil_db
682 *
683 * For each UDP socket, this is a hash table maintaining all cfil_info structs
684 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
685 */
686 struct cfil_db {
687 struct socket *cfdb_so;
688 uint32_t cfdb_count; /* Number of total content filters */
689 struct cfilhashhead *cfdb_hashbase;
690 u_long cfdb_hashmask;
691 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
692 };
693
694 /*
695 * CFIL specific mbuf tag:
696 * Save state of socket at the point of data entry into cfil.
697 * Use saved state for reinjection at protocol layer.
698 */
699 struct cfil_tag {
700 union sockaddr_in_4_6 cfil_faddr;
701 uint32_t cfil_so_state_change_cnt;
702 uint32_t cfil_so_options;
703 int cfil_inp_flags;
704 };
705
706 static ZONE_DECLARE(cfil_hash_entry_zone, "cfil_entry_hash",
707 sizeof(struct cfil_hash_entry), ZC_NONE);
708
709 static ZONE_DECLARE(cfil_db_zone, "cfil_db",
710 sizeof(struct cfil_db), ZC_NONE);
711
712 /*
713 * Statistics
714 */
715
716 struct cfil_stats cfil_stats;
717
718 /*
719 * For troubleshooting
720 */
721 int cfil_log_level = LOG_ERR;
722 int cfil_debug = 1;
723
724 // Debug controls added for selective debugging.
725 // Disabled for production. If enabled,
726 // these will have performance impact
727 #define LIFECYCLE_DEBUG 0
728 #define VERDICT_DEBUG 0
729 #define DATA_DEBUG 0
730 #define SHOW_DEBUG 0
731 #define GC_DEBUG 0
732 #define STATS_DEBUG 0
733
734 /*
735 * Sysctls for logs and statistics
736 */
737 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
738 struct sysctl_req *);
739 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
740 struct sysctl_req *);
741
742 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
743
744 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
745 &cfil_log_level, 0, "");
746
747 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
748 &cfil_debug, 0, "");
749
750 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
751 &cfil_sock_attached_count, 0, "");
752
753 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
754 &cfil_active_count, 0, "");
755
756 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
757 &cfil_close_wait_timeout, 0, "");
758
759 static int cfil_sbtrim = 1;
760 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
761 &cfil_sbtrim, 0, "");
762
763 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
764 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
765
766 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
767 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
768
769 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
770 &cfil_stats, cfil_stats, "");
771
772 /*
773 * Forward declaration to appease the compiler
774 */
775 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
776 uint64_t, uint64_t);
777 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
780 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
781 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
782 struct mbuf *, struct mbuf *, uint32_t);
783 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
784 struct mbuf *, uint32_t);
785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
786 struct in_addr, u_int16_t);
787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
788 struct in6_addr *, u_int16_t);
789
790 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
791 static void cfil_info_free(struct cfil_info *);
792 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
793 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
794 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
795 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
796 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
797 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
798 static void cfil_info_verify(struct cfil_info *);
799 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
800 uint64_t, uint64_t);
801 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
802 static void cfil_release_sockbuf(struct socket *, int);
803 static int cfil_filters_attached(struct socket *);
804
805 static void cfil_rw_lock_exclusive(lck_rw_t *);
806 static void cfil_rw_unlock_exclusive(lck_rw_t *);
807 static void cfil_rw_lock_shared(lck_rw_t *);
808 static void cfil_rw_unlock_shared(lck_rw_t *);
809 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
811
812 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
813 static errno_t cfil_db_init(struct socket *);
814 static void cfil_db_free(struct socket *so);
815 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
816 struct cfil_hash_entry *cfil_db_lookup_entry_internal(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t, boolean_t);
817 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
818 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
819 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *, struct mbuf *);
820 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
821 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, struct mbuf *, int);
822 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
823 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
824 struct mbuf *, struct mbuf *, uint32_t);
825 static int cfil_sock_udp_get_address_from_control(sa_family_t, struct mbuf *, uint8_t **);
826 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
827 static void cfil_sock_udp_is_closed(struct socket *);
828 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
829 static int cfil_sock_udp_shutdown(struct socket *, int *);
830 static void cfil_sock_udp_close_wait(struct socket *);
831 static void cfil_sock_udp_buf_update(struct sockbuf *);
832 static int cfil_filters_udp_attached(struct socket *, bool);
833 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
834 struct in6_addr **, struct in6_addr **,
835 u_int16_t *, u_int16_t *);
836 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
837 struct in_addr *, struct in_addr *,
838 u_int16_t *, u_int16_t *);
839 static void cfil_info_log(int, struct cfil_info *, const char *);
840 void cfil_filter_show(u_int32_t);
841 void cfil_info_show(void);
842 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int64_t);
843 bool cfil_info_action_timed_out(struct cfil_info *, int);
844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
845 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
846 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
847 static void cfil_udp_gc_thread_func(void *, wait_result_t);
848 static void cfil_info_udp_expire(void *, wait_result_t);
849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *, bool);
850 static void cfil_sock_received_verdict(struct socket *so);
851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
852 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
853 boolean_t, boolean_t);
854 static void cfil_stats_report_thread_func(void *, wait_result_t);
855 static void cfil_stats_report(void *v, wait_result_t w);
856
857 bool check_port(struct sockaddr *, u_short);
858
859 /*
860 * Content filter global read write lock
861 */
862
863 static void
864 cfil_rw_lock_exclusive(lck_rw_t *lck)
865 {
866 void *lr_saved;
867
868 lr_saved = __builtin_return_address(0);
869
870 lck_rw_lock_exclusive(lck);
871
872 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
873 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
874 }
875
876 static void
877 cfil_rw_unlock_exclusive(lck_rw_t *lck)
878 {
879 void *lr_saved;
880
881 lr_saved = __builtin_return_address(0);
882
883 lck_rw_unlock_exclusive(lck);
884
885 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
886 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
887 }
888
889 static void
890 cfil_rw_lock_shared(lck_rw_t *lck)
891 {
892 void *lr_saved;
893
894 lr_saved = __builtin_return_address(0);
895
896 lck_rw_lock_shared(lck);
897
898 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
899 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
900 }
901
902 static void
903 cfil_rw_unlock_shared(lck_rw_t *lck)
904 {
905 void *lr_saved;
906
907 lr_saved = __builtin_return_address(0);
908
909 lck_rw_unlock_shared(lck);
910
911 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
912 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
913 }
914
915 static boolean_t
916 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
917 {
918 void *lr_saved;
919 boolean_t upgraded;
920
921 lr_saved = __builtin_return_address(0);
922
923 upgraded = lck_rw_lock_shared_to_exclusive(lck);
924 if (upgraded) {
925 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
926 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
927 }
928 return upgraded;
929 }
930
931 static void
932 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
933 {
934 void *lr_saved;
935
936 lr_saved = __builtin_return_address(0);
937
938 lck_rw_lock_exclusive_to_shared(lck);
939
940 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
941 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
942 }
943
944 static void
945 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
946 {
947 #if !MACH_ASSERT
948 #pragma unused(lck, exclusive)
949 #endif
950 LCK_RW_ASSERT(lck,
951 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
952 }
953
954 /*
955 * Return the number of bytes in the mbuf chain using the same
956 * method as m_length() or sballoc()
957 *
958 * Returns data len - starting from PKT start
959 * - retmbcnt - optional param to get total mbuf bytes in chain
960 * - retmbnum - optional param to get number of mbufs in chain
961 */
962 static unsigned int
963 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
964 {
965 struct mbuf *m0;
966 unsigned int pktlen = 0;
967 int mbcnt;
968 int mbnum;
969
970 // Locate the start of data
971 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
972 if (m0->m_flags & M_PKTHDR) {
973 break;
974 }
975 }
976 if (m0 == NULL) {
977 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
978 return 0;
979 }
980 m = m0;
981
982 if (retmbcnt == NULL && retmbnum == NULL) {
983 return m_length(m);
984 }
985
986 pktlen = 0;
987 mbcnt = 0;
988 mbnum = 0;
989 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
990 pktlen += m0->m_len;
991 mbnum++;
992 mbcnt += MSIZE;
993 if (m0->m_flags & M_EXT) {
994 mbcnt += m0->m_ext.ext_size;
995 }
996 }
997 if (retmbcnt) {
998 *retmbcnt = mbcnt;
999 }
1000 if (retmbnum) {
1001 *retmbnum = mbnum;
1002 }
1003 return pktlen;
1004 }
1005
1006 static struct mbuf *
1007 cfil_data_start(struct mbuf *m)
1008 {
1009 struct mbuf *m0;
1010
1011 // Locate the start of data
1012 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1013 if (m0->m_flags & M_PKTHDR) {
1014 break;
1015 }
1016 }
1017 return m0;
1018 }
1019
1020 /*
1021 * Common mbuf queue utilities
1022 */
1023
1024 static inline void
1025 cfil_queue_init(struct cfil_queue *cfq)
1026 {
1027 cfq->q_start = 0;
1028 cfq->q_end = 0;
1029 MBUFQ_INIT(&cfq->q_mq);
1030 }
1031
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue *cfq)
1034 {
1035 uint64_t drained = cfq->q_start - cfq->q_end;
1036 cfq->q_start = 0;
1037 cfq->q_end = 0;
1038 MBUFQ_DRAIN(&cfq->q_mq);
1039
1040 return drained;
1041 }
1042
1043 /* Return 1 when empty, 0 otherwise */
1044 static inline int
1045 cfil_queue_empty(struct cfil_queue *cfq)
1046 {
1047 return MBUFQ_EMPTY(&cfq->q_mq);
1048 }
1049
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue *cfq)
1052 {
1053 return cfq->q_start;
1054 }
1055
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue *cfq)
1058 {
1059 return cfq->q_end;
1060 }
1061
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue *cfq)
1064 {
1065 return cfq->q_end - cfq->q_start;
1066 }
1067
1068 /*
1069 * Routines to verify some fundamental assumptions
1070 */
1071
1072 static void
1073 cfil_queue_verify(struct cfil_queue *cfq)
1074 {
1075 mbuf_t chain;
1076 mbuf_t m;
1077 mbuf_t n;
1078 uint64_t queuesize = 0;
1079
1080 /* Verify offset are ordered */
1081 VERIFY(cfq->q_start <= cfq->q_end);
1082
1083 /*
1084 * When queue is empty, the offsets are equal otherwise the offsets
1085 * are different
1086 */
1087 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1088 (!MBUFQ_EMPTY(&cfq->q_mq) &&
1089 cfq->q_start != cfq->q_end));
1090
1091 MBUFQ_FOREACH(chain, &cfq->q_mq) {
1092 size_t chainsize = 0;
1093 m = chain;
1094 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1095 // skip the addr and control stuff if present
1096 m = cfil_data_start(m);
1097
1098 if (m == NULL ||
1099 m == (void *)M_TAG_FREE_PATTERN ||
1100 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1101 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1102 panic("%s - mq %p is free at %p", __func__,
1103 &cfq->q_mq, m);
1104 }
1105 for (n = m; n != NULL; n = n->m_next) {
1106 if (n->m_type != MT_DATA &&
1107 n->m_type != MT_HEADER &&
1108 n->m_type != MT_OOBDATA) {
1109 panic("%s - %p unsupported type %u", __func__,
1110 n, n->m_type);
1111 }
1112 chainsize += n->m_len;
1113 }
1114 if (mlen != chainsize) {
1115 panic("%s - %p m_length() %u != chainsize %lu",
1116 __func__, m, mlen, chainsize);
1117 }
1118 queuesize += chainsize;
1119 }
1120 if (queuesize != cfq->q_end - cfq->q_start) {
1121 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1122 m, queuesize, cfq->q_end - cfq->q_start);
1123 }
1124 }
1125
1126 static void
1127 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1128 {
1129 CFIL_QUEUE_VERIFY(cfq);
1130
1131 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1132 cfq->q_end += len;
1133
1134 CFIL_QUEUE_VERIFY(cfq);
1135 }
1136
1137 static void
1138 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1139 {
1140 CFIL_QUEUE_VERIFY(cfq);
1141
1142 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1143
1144 MBUFQ_REMOVE(&cfq->q_mq, m);
1145 MBUFQ_NEXT(m) = NULL;
1146 cfq->q_start += len;
1147
1148 CFIL_QUEUE_VERIFY(cfq);
1149 }
1150
1151 static mbuf_t
1152 cfil_queue_first(struct cfil_queue *cfq)
1153 {
1154 return MBUFQ_FIRST(&cfq->q_mq);
1155 }
1156
1157 static mbuf_t
1158 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1159 {
1160 #pragma unused(cfq)
1161 return MBUFQ_NEXT(m);
1162 }
1163
1164 static void
1165 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1166 {
1167 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1168 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1169
1170 /* Verify the queues are ordered so that pending is before ctl */
1171 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1172
1173 /* The peek offset cannot be less than the pass offset */
1174 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1175
1176 /* Make sure we've updated the offset we peeked at */
1177 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1178 }
1179
1180 static void
1181 cfil_entry_verify(struct cfil_entry *entry)
1182 {
1183 cfil_entry_buf_verify(&entry->cfe_snd);
1184 cfil_entry_buf_verify(&entry->cfe_rcv);
1185 }
1186
1187 static void
1188 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1189 {
1190 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1191
1192 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1193 }
1194
1195 static void
1196 cfil_info_verify(struct cfil_info *cfil_info)
1197 {
1198 int i;
1199
1200 if (cfil_info == NULL) {
1201 return;
1202 }
1203
1204 cfil_info_buf_verify(&cfil_info->cfi_snd);
1205 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1206
1207 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1208 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1209 }
1210 }
1211
1212 static void
1213 verify_content_filter(struct content_filter *cfc)
1214 {
1215 struct cfil_entry *entry;
1216 uint32_t count = 0;
1217
1218 VERIFY(cfc->cf_sock_count >= 0);
1219
1220 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1221 count++;
1222 VERIFY(cfc == entry->cfe_filter);
1223 }
1224 VERIFY(count == cfc->cf_sock_count);
1225 }
1226
1227 /*
1228 * Kernel control socket callbacks
1229 */
1230 static errno_t
1231 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1232 void **unitinfo)
1233 {
1234 errno_t error = 0;
1235 struct content_filter *cfc = NULL;
1236
1237 CFIL_LOG(LOG_NOTICE, "");
1238
1239 cfc = zalloc(content_filter_zone);
1240 if (cfc == NULL) {
1241 CFIL_LOG(LOG_ERR, "zalloc failed");
1242 error = ENOMEM;
1243 goto done;
1244 }
1245 bzero(cfc, sizeof(struct content_filter));
1246
1247 cfil_rw_lock_exclusive(&cfil_lck_rw);
1248 if (content_filters == NULL) {
1249 struct content_filter **tmp;
1250
1251 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1252
1253 MALLOC(tmp,
1254 struct content_filter **,
1255 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1256 M_TEMP,
1257 M_WAITOK | M_ZERO);
1258
1259 cfil_rw_lock_exclusive(&cfil_lck_rw);
1260
1261 if (tmp == NULL && content_filters == NULL) {
1262 error = ENOMEM;
1263 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1264 goto done;
1265 }
1266 /* Another thread may have won the race */
1267 if (content_filters != NULL) {
1268 FREE(tmp, M_TEMP);
1269 } else {
1270 content_filters = tmp;
1271 }
1272 }
1273
1274 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1275 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1276 error = EINVAL;
1277 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1278 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1279 error = EADDRINUSE;
1280 } else {
1281 /*
1282 * kernel control socket kcunit numbers start at 1
1283 */
1284 content_filters[sac->sc_unit - 1] = cfc;
1285
1286 cfc->cf_kcref = kctlref;
1287 cfc->cf_kcunit = sac->sc_unit;
1288 TAILQ_INIT(&cfc->cf_sock_entries);
1289
1290 *unitinfo = cfc;
1291 cfil_active_count++;
1292
1293 // Allocate periodic stats buffer for this filter
1294 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1295 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1296
1297 struct cfil_stats_report_buffer *buf;
1298
1299 MALLOC(buf,
1300 struct cfil_stats_report_buffer *,
1301 sizeof(struct cfil_stats_report_buffer),
1302 M_TEMP,
1303 M_WAITOK | M_ZERO);
1304
1305 cfil_rw_lock_exclusive(&cfil_lck_rw);
1306
1307 if (buf == NULL) {
1308 error = ENOMEM;
1309 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1310 goto done;
1311 }
1312
1313 /* Another thread may have won the race */
1314 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1315 FREE(buf, M_TEMP);
1316 } else {
1317 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1318 }
1319 }
1320 }
1321 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1322 done:
1323 if (error != 0 && cfc != NULL) {
1324 zfree(content_filter_zone, cfc);
1325 }
1326
1327 if (error == 0) {
1328 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1329 } else {
1330 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1331 }
1332
1333 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1334 error, cfil_active_count, sac->sc_unit);
1335
1336 return error;
1337 }
1338
1339 static errno_t
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1341 {
1342 #pragma unused(kctlref)
1343 errno_t error = 0;
1344 struct content_filter *cfc;
1345 struct cfil_entry *entry;
1346 uint64_t sock_flow_id = 0;
1347
1348 CFIL_LOG(LOG_NOTICE, "");
1349
1350 if (content_filters == NULL) {
1351 CFIL_LOG(LOG_ERR, "no content filter");
1352 error = EINVAL;
1353 goto done;
1354 }
1355 if (kcunit > MAX_CONTENT_FILTER) {
1356 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357 kcunit, MAX_CONTENT_FILTER);
1358 error = EINVAL;
1359 goto done;
1360 }
1361
1362 cfc = (struct content_filter *)unitinfo;
1363 if (cfc == NULL) {
1364 goto done;
1365 }
1366
1367 cfil_rw_lock_exclusive(&cfil_lck_rw);
1368 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1369 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1370 kcunit);
1371 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1372 goto done;
1373 }
1374 cfc->cf_flags |= CFF_DETACHING;
1375 /*
1376 * Remove all sockets from the filter
1377 */
1378 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1379 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1380
1381 verify_content_filter(cfc);
1382 /*
1383 * Accept all outstanding data by pushing to next filter
1384 * or back to socket
1385 *
1386 * TBD: Actually we should make sure all data has been pushed
1387 * back to socket
1388 */
1389 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1390 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1391 struct socket *so = cfil_info->cfi_so;
1392 sock_flow_id = cfil_info->cfi_sock_id;
1393
1394 /* Need to let data flow immediately */
1395 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1396 CFEF_DATA_START;
1397
1398 /*
1399 * Respect locking hierarchy
1400 */
1401 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1402
1403 socket_lock(so, 1);
1404
1405 /*
1406 * When cfe_filter is NULL the filter is detached
1407 * and the entry has been removed from cf_sock_entries
1408 */
1409 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1410 cfil_rw_lock_exclusive(&cfil_lck_rw);
1411 goto release;
1412 }
1413
1414 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1415 CFM_MAX_OFFSET,
1416 CFM_MAX_OFFSET);
1417
1418 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1419 CFM_MAX_OFFSET,
1420 CFM_MAX_OFFSET);
1421
1422 cfil_rw_lock_exclusive(&cfil_lck_rw);
1423
1424 /*
1425 * Check again to make sure if the cfil_info is still valid
1426 * as the socket may have been unlocked when when calling
1427 * cfil_acquire_sockbuf()
1428 */
1429 if (entry->cfe_filter == NULL ||
1430 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1431 goto release;
1432 }
1433
1434 /* The filter is now detached */
1435 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1436 #if LIFECYCLE_DEBUG
1437 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1438 #endif
1439 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1440 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1441 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1442 cfil_filters_attached(so) == 0) {
1443 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1444 (uint64_t)VM_KERNEL_ADDRPERM(so));
1445 wakeup((caddr_t)cfil_info);
1446 }
1447
1448 /*
1449 * Remove the filter entry from the content filter
1450 * but leave the rest of the state intact as the queues
1451 * may not be empty yet
1452 */
1453 entry->cfe_filter = NULL;
1454 entry->cfe_necp_control_unit = 0;
1455
1456 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1457 cfc->cf_sock_count--;
1458 release:
1459 socket_unlock(so, 1);
1460 }
1461 }
1462 verify_content_filter(cfc);
1463
1464 /* Free the stats buffer for this filter */
1465 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1466 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1467 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1468 }
1469 VERIFY(cfc->cf_sock_count == 0);
1470
1471 /*
1472 * Make filter inactive
1473 */
1474 content_filters[kcunit - 1] = NULL;
1475 cfil_active_count--;
1476 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1477
1478 if (cfc->cf_crypto_state != NULL) {
1479 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1480 cfc->cf_crypto_state = NULL;
1481 }
1482
1483 zfree(content_filter_zone, cfc);
1484 done:
1485 if (error == 0) {
1486 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1487 } else {
1488 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1489 }
1490
1491 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1492 error, cfil_active_count, kcunit);
1493
1494 return error;
1495 }
1496
1497 /*
1498 * cfil_acquire_sockbuf()
1499 *
1500 * Prevent any other thread from acquiring the sockbuf
1501 * We use sb_cfil_thread as a semaphore to prevent other threads from
1502 * messing with the sockbuf -- see sblock()
1503 * Note: We do not set SB_LOCK here because the thread may check or modify
1504 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505 * sblock(), sbunlock() or sodefunct()
1506 */
1507 static int
1508 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1509 {
1510 thread_t tp = current_thread();
1511 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1512 lck_mtx_t *mutex_held;
1513 int error = 0;
1514
1515 /*
1516 * Wait until no thread is holding the sockbuf and other content
1517 * filter threads have released the sockbuf
1518 */
1519 while ((sb->sb_flags & SB_LOCK) ||
1520 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1521 if (so->so_proto->pr_getlock != NULL) {
1522 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1523 } else {
1524 mutex_held = so->so_proto->pr_domain->dom_mtx;
1525 }
1526
1527 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1528
1529 sb->sb_wantlock++;
1530 VERIFY(sb->sb_wantlock != 0);
1531
1532 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1533 NULL);
1534
1535 VERIFY(sb->sb_wantlock != 0);
1536 sb->sb_wantlock--;
1537 }
1538 /*
1539 * Use reference count for repetitive calls on same thread
1540 */
1541 if (sb->sb_cfil_refs == 0) {
1542 VERIFY(sb->sb_cfil_thread == NULL);
1543 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1544
1545 sb->sb_cfil_thread = tp;
1546 sb->sb_flags |= SB_LOCK;
1547 }
1548 sb->sb_cfil_refs++;
1549
1550 /* We acquire the socket buffer when we need to cleanup */
1551 if (cfil_info == NULL) {
1552 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1553 (uint64_t)VM_KERNEL_ADDRPERM(so));
1554 error = 0;
1555 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1556 CFIL_LOG(LOG_ERR, "so %llx drop set",
1557 (uint64_t)VM_KERNEL_ADDRPERM(so));
1558 error = EPIPE;
1559 }
1560
1561 return error;
1562 }
1563
1564 static void
1565 cfil_release_sockbuf(struct socket *so, int outgoing)
1566 {
1567 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1568 thread_t tp = current_thread();
1569
1570 socket_lock_assert_owned(so);
1571
1572 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1573 panic("%s sb_cfil_thread %p not current %p", __func__,
1574 sb->sb_cfil_thread, tp);
1575 }
1576 /*
1577 * Don't panic if we are defunct because SB_LOCK has
1578 * been cleared by sodefunct()
1579 */
1580 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1581 panic("%s SB_LOCK not set on %p", __func__,
1582 sb);
1583 }
1584 /*
1585 * We can unlock when the thread unwinds to the last reference
1586 */
1587 sb->sb_cfil_refs--;
1588 if (sb->sb_cfil_refs == 0) {
1589 sb->sb_cfil_thread = NULL;
1590 sb->sb_flags &= ~SB_LOCK;
1591
1592 if (sb->sb_wantlock > 0) {
1593 wakeup(&sb->sb_flags);
1594 }
1595 }
1596 }
1597
1598 cfil_sock_id_t
1599 cfil_sock_id_from_socket(struct socket *so)
1600 {
1601 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1602 return so->so_cfil->cfi_sock_id;
1603 } else {
1604 return CFIL_SOCK_ID_NONE;
1605 }
1606 }
1607
1608 static bool
1609 cfil_socket_safe_lock(struct inpcb *inp)
1610 {
1611 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1612 socket_lock(inp->inp_socket, 1);
1613 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1614 return true;
1615 }
1616 socket_unlock(inp->inp_socket, 1);
1617 }
1618 return false;
1619 }
1620
1621 /*
1622 * cfil_socket_safe_lock_rip -
1623 * This routine attempts to lock the rip socket safely.
1624 * The passed in ripcbinfo is assumed to be locked and must be unlocked (regardless
1625 * of success/failure) before calling socket_unlock(). This is to avoid double
1626 * locking since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1627 * so_usecount is 0.
1628 */
1629 static bool
1630 cfil_socket_safe_lock_rip(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1631 {
1632 struct socket *so = NULL;
1633
1634 VERIFY(pcbinfo != NULL);
1635
1636 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1637 so = inp->inp_socket;
1638 socket_lock(so, 1);
1639 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1640 lck_rw_done(pcbinfo->ipi_lock);
1641 return true;
1642 }
1643 }
1644
1645 lck_rw_done(pcbinfo->ipi_lock);
1646
1647 if (so) {
1648 socket_unlock(so, 1);
1649 }
1650 return false;
1651 }
1652
1653 static struct socket *
1654 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1655 {
1656 struct socket *so = NULL;
1657 u_int64_t gencnt = cfil_sock_id >> 32;
1658 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1659 struct inpcb *inp = NULL;
1660 struct inpcbinfo *pcbinfo = NULL;
1661
1662 #if VERDICT_DEBUG
1663 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1664 #endif
1665
1666 if (udp_only) {
1667 goto find_udp;
1668 }
1669
1670 pcbinfo = &tcbinfo;
1671 lck_rw_lock_shared(pcbinfo->ipi_lock);
1672 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1673 if (inp->inp_state != INPCB_STATE_DEAD &&
1674 inp->inp_socket != NULL &&
1675 inp->inp_flowhash == flowhash &&
1676 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1677 inp->inp_socket->so_cfil != NULL) {
1678 if (cfil_socket_safe_lock(inp)) {
1679 so = inp->inp_socket;
1680 }
1681 break;
1682 }
1683 }
1684 lck_rw_done(pcbinfo->ipi_lock);
1685 if (so != NULL) {
1686 goto done;
1687 }
1688
1689 find_udp:
1690
1691 pcbinfo = &udbinfo;
1692 lck_rw_lock_shared(pcbinfo->ipi_lock);
1693 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1694 if (inp->inp_state != INPCB_STATE_DEAD &&
1695 inp->inp_socket != NULL &&
1696 inp->inp_socket->so_cfil_db != NULL &&
1697 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1698 if (cfil_socket_safe_lock(inp)) {
1699 so = inp->inp_socket;
1700 }
1701 break;
1702 }
1703 }
1704 lck_rw_done(pcbinfo->ipi_lock);
1705 if (so != NULL) {
1706 goto done;
1707 }
1708
1709 pcbinfo = &ripcbinfo;
1710 lck_rw_lock_shared(pcbinfo->ipi_lock);
1711 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1712 if (inp->inp_state != INPCB_STATE_DEAD &&
1713 inp->inp_socket != NULL &&
1714 inp->inp_socket->so_cfil_db != NULL &&
1715 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1716 if (cfil_socket_safe_lock_rip(inp, pcbinfo)) {
1717 so = inp->inp_socket;
1718 }
1719 /* pcbinfo is already unlocked, we are done. */
1720 goto done;
1721 }
1722 }
1723 lck_rw_done(pcbinfo->ipi_lock);
1724
1725 done:
1726 if (so == NULL) {
1727 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1728 CFIL_LOG(LOG_DEBUG,
1729 "no socket for sock_id %llx gencnt %llx flowhash %x",
1730 cfil_sock_id, gencnt, flowhash);
1731 }
1732
1733 return so;
1734 }
1735
1736 static struct socket *
1737 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1738 {
1739 struct socket *so = NULL;
1740 struct inpcb *inp = NULL;
1741 struct inpcbinfo *pcbinfo = &tcbinfo;
1742
1743 lck_rw_lock_shared(pcbinfo->ipi_lock);
1744 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1745 if (inp->inp_state != INPCB_STATE_DEAD &&
1746 inp->inp_socket != NULL &&
1747 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1748 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1749 if (cfil_socket_safe_lock(inp)) {
1750 so = inp->inp_socket;
1751 }
1752 break;
1753 }
1754 }
1755 lck_rw_done(pcbinfo->ipi_lock);
1756 if (so != NULL) {
1757 goto done;
1758 }
1759
1760 pcbinfo = &udbinfo;
1761 lck_rw_lock_shared(pcbinfo->ipi_lock);
1762 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1763 if (inp->inp_state != INPCB_STATE_DEAD &&
1764 inp->inp_socket != NULL &&
1765 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1766 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1767 if (cfil_socket_safe_lock(inp)) {
1768 so = inp->inp_socket;
1769 }
1770 break;
1771 }
1772 }
1773 lck_rw_done(pcbinfo->ipi_lock);
1774
1775 done:
1776 return so;
1777 }
1778
1779 static void
1780 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1781 {
1782 struct cfil_info *cfil = NULL;
1783 Boolean found = FALSE;
1784 int kcunit;
1785
1786 if (cfil_info == NULL) {
1787 return;
1788 }
1789
1790 if (report_frequency) {
1791 if (entry == NULL) {
1792 return;
1793 }
1794
1795 // Update stats reporting frequency.
1796 if (entry->cfe_stats_report_frequency != report_frequency) {
1797 entry->cfe_stats_report_frequency = report_frequency;
1798 if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1799 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1800 }
1801 microuptime(&entry->cfe_stats_report_ts);
1802
1803 // Insert cfil_info into list only if it is not in yet.
1804 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1805 if (cfil == cfil_info) {
1806 return;
1807 }
1808 }
1809
1810 TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1811
1812 // Wake up stats thread if this is first flow added
1813 if (cfil_sock_attached_stats_count == 0) {
1814 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1815 }
1816 cfil_sock_attached_stats_count++;
1817 #if STATS_DEBUG
1818 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1819 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1820 cfil_info->cfi_sock_id,
1821 entry->cfe_stats_report_frequency);
1822 #endif
1823 }
1824 } else {
1825 // Turn off stats reporting for this filter.
1826 if (entry != NULL) {
1827 // Already off, no change.
1828 if (entry->cfe_stats_report_frequency == 0) {
1829 return;
1830 }
1831
1832 entry->cfe_stats_report_frequency = 0;
1833 // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1834 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1835 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1836 return;
1837 }
1838 }
1839 }
1840
1841 // No more filter asking for stats for this cfil_info, remove from list.
1842 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1843 found = FALSE;
1844 TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1845 if (cfil == cfil_info) {
1846 found = TRUE;
1847 break;
1848 }
1849 }
1850 if (found) {
1851 cfil_sock_attached_stats_count--;
1852 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1853 #if STATS_DEBUG
1854 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1855 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1856 cfil_info->cfi_sock_id);
1857 #endif
1858 }
1859 }
1860 }
1861 }
1862
1863 static errno_t
1864 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1865 int flags)
1866 {
1867 #pragma unused(kctlref, flags)
1868 errno_t error = 0;
1869 struct cfil_msg_hdr *msghdr;
1870 struct content_filter *cfc = (struct content_filter *)unitinfo;
1871 struct socket *so;
1872 struct cfil_msg_action *action_msg;
1873 struct cfil_entry *entry;
1874 struct cfil_info *cfil_info = NULL;
1875 unsigned int data_len = 0;
1876
1877 CFIL_LOG(LOG_INFO, "");
1878
1879 if (content_filters == NULL) {
1880 CFIL_LOG(LOG_ERR, "no content filter");
1881 error = EINVAL;
1882 goto done;
1883 }
1884 if (kcunit > MAX_CONTENT_FILTER) {
1885 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1886 kcunit, MAX_CONTENT_FILTER);
1887 error = EINVAL;
1888 goto done;
1889 }
1890 if (m == NULL) {
1891 CFIL_LOG(LOG_ERR, "null mbuf");
1892 error = EINVAL;
1893 goto done;
1894 }
1895 data_len = m_length(m);
1896
1897 if (data_len < sizeof(struct cfil_msg_hdr)) {
1898 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1899 error = EINVAL;
1900 goto done;
1901 }
1902 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1903 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1904 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1905 error = EINVAL;
1906 goto done;
1907 }
1908 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1909 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1910 error = EINVAL;
1911 goto done;
1912 }
1913 if (msghdr->cfm_len > data_len) {
1914 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1915 error = EINVAL;
1916 goto done;
1917 }
1918
1919 /* Validate action operation */
1920 switch (msghdr->cfm_op) {
1921 case CFM_OP_DATA_UPDATE:
1922 OSIncrementAtomic(
1923 &cfil_stats.cfs_ctl_action_data_update);
1924 break;
1925 case CFM_OP_DROP:
1926 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1927 break;
1928 case CFM_OP_BLESS_CLIENT:
1929 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1930 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1931 error = EINVAL;
1932 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1933 msghdr->cfm_len,
1934 msghdr->cfm_op);
1935 goto done;
1936 }
1937 error = cfil_action_bless_client(kcunit, msghdr);
1938 goto done;
1939 case CFM_OP_SET_CRYPTO_KEY:
1940 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1941 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1942 error = EINVAL;
1943 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1944 msghdr->cfm_len,
1945 msghdr->cfm_op);
1946 goto done;
1947 }
1948 error = cfil_action_set_crypto_key(kcunit, msghdr);
1949 goto done;
1950 default:
1951 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1952 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1953 error = EINVAL;
1954 goto done;
1955 }
1956 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1957 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1958 error = EINVAL;
1959 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1960 msghdr->cfm_len,
1961 msghdr->cfm_op);
1962 goto done;
1963 }
1964 cfil_rw_lock_shared(&cfil_lck_rw);
1965 if (cfc != (void *)content_filters[kcunit - 1]) {
1966 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1967 kcunit);
1968 error = EINVAL;
1969 cfil_rw_unlock_shared(&cfil_lck_rw);
1970 goto done;
1971 }
1972 cfil_rw_unlock_shared(&cfil_lck_rw);
1973
1974 // Search for socket (TCP+UDP and lock so)
1975 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1976 if (so == NULL) {
1977 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1978 msghdr->cfm_sock_id);
1979 error = EINVAL;
1980 goto done;
1981 }
1982
1983 cfil_info = so->so_cfil_db != NULL ?
1984 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1985
1986 // We should not obtain global lock here in order to avoid deadlock down the path.
1987 // But we attempt to retain a valid cfil_info to prevent any deallocation until
1988 // we are done. Abort retain if cfil_info has already entered the free code path.
1989 if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1990 socket_unlock(so, 1);
1991 goto done;
1992 }
1993
1994 if (cfil_info == NULL) {
1995 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1996 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1997 error = EINVAL;
1998 goto unlock;
1999 } else if (cfil_info->cfi_flags & CFIF_DROP) {
2000 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
2001 (uint64_t)VM_KERNEL_ADDRPERM(so));
2002 error = EINVAL;
2003 goto unlock;
2004 }
2005
2006 if (cfil_info->cfi_debug) {
2007 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2008 }
2009
2010 entry = &cfil_info->cfi_entries[kcunit - 1];
2011 if (entry->cfe_filter == NULL) {
2012 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2013 (uint64_t)VM_KERNEL_ADDRPERM(so));
2014 error = EINVAL;
2015 goto unlock;
2016 }
2017
2018 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2019 entry->cfe_flags |= CFEF_DATA_START;
2020 } else {
2021 CFIL_LOG(LOG_ERR,
2022 "so %llx attached not sent for %u",
2023 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2024 error = EINVAL;
2025 goto unlock;
2026 }
2027
2028 microuptime(&entry->cfe_last_action);
2029 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2030
2031 action_msg = (struct cfil_msg_action *)msghdr;
2032
2033 switch (msghdr->cfm_op) {
2034 case CFM_OP_DATA_UPDATE:
2035
2036 if (cfil_info->cfi_debug) {
2037 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2038 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2039 (uint64_t)VM_KERNEL_ADDRPERM(so),
2040 cfil_info->cfi_sock_id,
2041 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2042 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2043 }
2044
2045 #if VERDICT_DEBUG
2046 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2047 (uint64_t)VM_KERNEL_ADDRPERM(so),
2048 cfil_info->cfi_sock_id,
2049 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2050 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2051 #endif
2052 /*
2053 * Received verdict, at this point we know this
2054 * socket connection is allowed. Unblock thread
2055 * immediately before proceeding to process the verdict.
2056 */
2057 cfil_sock_received_verdict(so);
2058
2059 if (action_msg->cfa_out_peek_offset != 0 ||
2060 action_msg->cfa_out_pass_offset != 0) {
2061 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2062 action_msg->cfa_out_pass_offset,
2063 action_msg->cfa_out_peek_offset);
2064 }
2065 if (error == EJUSTRETURN) {
2066 error = 0;
2067 }
2068 if (error != 0) {
2069 break;
2070 }
2071 if (action_msg->cfa_in_peek_offset != 0 ||
2072 action_msg->cfa_in_pass_offset != 0) {
2073 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2074 action_msg->cfa_in_pass_offset,
2075 action_msg->cfa_in_peek_offset);
2076 }
2077 if (error == EJUSTRETURN) {
2078 error = 0;
2079 }
2080
2081 // Toggle stats reporting according to received verdict.
2082 cfil_rw_lock_exclusive(&cfil_lck_rw);
2083 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2084 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2085
2086 break;
2087
2088 case CFM_OP_DROP:
2089 if (cfil_info->cfi_debug) {
2090 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2091 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2092 (uint64_t)VM_KERNEL_ADDRPERM(so),
2093 cfil_info->cfi_sock_id,
2094 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2095 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2096 }
2097
2098 #if VERDICT_DEBUG
2099 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2100 (uint64_t)VM_KERNEL_ADDRPERM(so),
2101 cfil_info->cfi_sock_id,
2102 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2103 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2104 #endif
2105 error = cfil_action_drop(so, cfil_info, kcunit);
2106 cfil_sock_received_verdict(so);
2107 break;
2108
2109 default:
2110 error = EINVAL;
2111 break;
2112 }
2113 unlock:
2114 CFIL_INFO_FREE(cfil_info)
2115 socket_unlock(so, 1);
2116 done:
2117 mbuf_freem(m);
2118
2119 if (error == 0) {
2120 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2121 } else {
2122 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2123 }
2124
2125 return error;
2126 }
2127
2128 static errno_t
2129 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2130 int opt, void *data, size_t *len)
2131 {
2132 #pragma unused(kctlref, opt)
2133 struct cfil_info *cfil_info = NULL;
2134 errno_t error = 0;
2135 struct content_filter *cfc = (struct content_filter *)unitinfo;
2136
2137 CFIL_LOG(LOG_NOTICE, "");
2138
2139 cfil_rw_lock_shared(&cfil_lck_rw);
2140
2141 if (content_filters == NULL) {
2142 CFIL_LOG(LOG_ERR, "no content filter");
2143 error = EINVAL;
2144 goto done;
2145 }
2146 if (kcunit > MAX_CONTENT_FILTER) {
2147 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2148 kcunit, MAX_CONTENT_FILTER);
2149 error = EINVAL;
2150 goto done;
2151 }
2152 if (cfc != (void *)content_filters[kcunit - 1]) {
2153 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2154 kcunit);
2155 error = EINVAL;
2156 goto done;
2157 }
2158 switch (opt) {
2159 case CFIL_OPT_NECP_CONTROL_UNIT:
2160 if (*len < sizeof(uint32_t)) {
2161 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2162 error = EINVAL;
2163 goto done;
2164 }
2165 if (data != NULL) {
2166 *(uint32_t *)data = cfc->cf_necp_control_unit;
2167 }
2168 break;
2169 case CFIL_OPT_GET_SOCKET_INFO:
2170 if (*len != sizeof(struct cfil_opt_sock_info)) {
2171 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2172 error = EINVAL;
2173 goto done;
2174 }
2175 if (data == NULL) {
2176 CFIL_LOG(LOG_ERR, "data not passed");
2177 error = EINVAL;
2178 goto done;
2179 }
2180
2181 struct cfil_opt_sock_info *sock_info =
2182 (struct cfil_opt_sock_info *) data;
2183
2184 // Unlock here so that we never hold both cfil_lck_rw and the
2185 // socket_lock at the same time. Otherwise, this can deadlock
2186 // because soclose() takes the socket_lock and then exclusive
2187 // cfil_lck_rw and we require the opposite order.
2188
2189 // WARNING: Be sure to never use anything protected
2190 // by cfil_lck_rw beyond this point.
2191 // WARNING: Be sure to avoid fallthrough and
2192 // goto return_already_unlocked from this branch.
2193 cfil_rw_unlock_shared(&cfil_lck_rw);
2194
2195 // Search (TCP+UDP) and lock socket
2196 struct socket *sock =
2197 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2198 if (sock == NULL) {
2199 #if LIFECYCLE_DEBUG
2200 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2201 sock_info->cfs_sock_id);
2202 #endif
2203 error = ENOENT;
2204 goto return_already_unlocked;
2205 }
2206
2207 cfil_info = (sock->so_cfil_db != NULL) ?
2208 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2209
2210 if (cfil_info == NULL) {
2211 #if LIFECYCLE_DEBUG
2212 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2213 (uint64_t)VM_KERNEL_ADDRPERM(sock));
2214 #endif
2215 error = EINVAL;
2216 socket_unlock(sock, 1);
2217 goto return_already_unlocked;
2218 }
2219
2220 // Fill out family, type, and protocol
2221 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2222 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2223 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2224
2225 // Source and destination addresses
2226 struct inpcb *inp = sotoinpcb(sock);
2227 if (inp->inp_vflag & INP_IPV6) {
2228 struct in6_addr *laddr = NULL, *faddr = NULL;
2229 u_int16_t lport = 0, fport = 0;
2230
2231 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2232 &laddr, &faddr, &lport, &fport);
2233 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2234 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2235 } else if (inp->inp_vflag & INP_IPV4) {
2236 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2237 u_int16_t lport = 0, fport = 0;
2238
2239 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2240 &laddr, &faddr, &lport, &fport);
2241 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2242 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2243 }
2244
2245 // Set the pid info
2246 sock_info->cfs_pid = sock->last_pid;
2247 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2248
2249 if (sock->so_flags & SOF_DELEGATED) {
2250 sock_info->cfs_e_pid = sock->e_pid;
2251 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2252 } else {
2253 sock_info->cfs_e_pid = sock->last_pid;
2254 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2255 }
2256
2257 socket_unlock(sock, 1);
2258
2259 goto return_already_unlocked;
2260 default:
2261 error = ENOPROTOOPT;
2262 break;
2263 }
2264 done:
2265 cfil_rw_unlock_shared(&cfil_lck_rw);
2266
2267 return error;
2268
2269 return_already_unlocked:
2270
2271 return error;
2272 }
2273
2274 static errno_t
2275 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2276 int opt, void *data, size_t len)
2277 {
2278 #pragma unused(kctlref, opt)
2279 errno_t error = 0;
2280 struct content_filter *cfc = (struct content_filter *)unitinfo;
2281
2282 CFIL_LOG(LOG_NOTICE, "");
2283
2284 cfil_rw_lock_exclusive(&cfil_lck_rw);
2285
2286 if (content_filters == NULL) {
2287 CFIL_LOG(LOG_ERR, "no content filter");
2288 error = EINVAL;
2289 goto done;
2290 }
2291 if (kcunit > MAX_CONTENT_FILTER) {
2292 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2293 kcunit, MAX_CONTENT_FILTER);
2294 error = EINVAL;
2295 goto done;
2296 }
2297 if (cfc != (void *)content_filters[kcunit - 1]) {
2298 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2299 kcunit);
2300 error = EINVAL;
2301 goto done;
2302 }
2303 switch (opt) {
2304 case CFIL_OPT_NECP_CONTROL_UNIT:
2305 if (len < sizeof(uint32_t)) {
2306 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2307 "len too small %lu", len);
2308 error = EINVAL;
2309 goto done;
2310 }
2311 if (cfc->cf_necp_control_unit != 0) {
2312 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2313 "already set %u",
2314 cfc->cf_necp_control_unit);
2315 error = EINVAL;
2316 goto done;
2317 }
2318 cfc->cf_necp_control_unit = *(uint32_t *)data;
2319 break;
2320 default:
2321 error = ENOPROTOOPT;
2322 break;
2323 }
2324 done:
2325 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2326
2327 return error;
2328 }
2329
2330
2331 static void
2332 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2333 {
2334 #pragma unused(kctlref, flags)
2335 struct content_filter *cfc = (struct content_filter *)unitinfo;
2336 struct socket *so = NULL;
2337 int error;
2338 struct cfil_entry *entry;
2339 struct cfil_info *cfil_info = NULL;
2340
2341 CFIL_LOG(LOG_INFO, "");
2342
2343 if (content_filters == NULL) {
2344 CFIL_LOG(LOG_ERR, "no content filter");
2345 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2346 return;
2347 }
2348 if (kcunit > MAX_CONTENT_FILTER) {
2349 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2350 kcunit, MAX_CONTENT_FILTER);
2351 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2352 return;
2353 }
2354 cfil_rw_lock_shared(&cfil_lck_rw);
2355 if (cfc != (void *)content_filters[kcunit - 1]) {
2356 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2357 kcunit);
2358 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2359 goto done;
2360 }
2361 /* Let's assume the flow control is lifted */
2362 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2363 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2364 cfil_rw_lock_exclusive(&cfil_lck_rw);
2365 }
2366
2367 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2368
2369 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2370 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2371 }
2372 /*
2373 * Flow control will be raised again as soon as an entry cannot enqueue
2374 * to the kernel control socket
2375 */
2376 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2377 verify_content_filter(cfc);
2378
2379 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2380
2381 /* Find an entry that is flow controlled */
2382 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2383 if (entry->cfe_cfil_info == NULL ||
2384 entry->cfe_cfil_info->cfi_so == NULL) {
2385 continue;
2386 }
2387 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2388 continue;
2389 }
2390 }
2391 if (entry == NULL) {
2392 break;
2393 }
2394
2395 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2396
2397 cfil_info = entry->cfe_cfil_info;
2398 so = cfil_info->cfi_so;
2399
2400 cfil_rw_unlock_shared(&cfil_lck_rw);
2401 socket_lock(so, 1);
2402
2403 do {
2404 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2405 if (error == 0) {
2406 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2407 }
2408 cfil_release_sockbuf(so, 1);
2409 if (error != 0) {
2410 break;
2411 }
2412
2413 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2414 if (error == 0) {
2415 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2416 }
2417 cfil_release_sockbuf(so, 0);
2418 } while (0);
2419
2420 socket_lock_assert_owned(so);
2421 socket_unlock(so, 1);
2422
2423 cfil_rw_lock_shared(&cfil_lck_rw);
2424 }
2425 done:
2426 cfil_rw_unlock_shared(&cfil_lck_rw);
2427 }
2428
2429 void
2430 cfil_init(void)
2431 {
2432 struct kern_ctl_reg kern_ctl;
2433 errno_t error = 0;
2434 unsigned int mbuf_limit = 0;
2435
2436 CFIL_LOG(LOG_NOTICE, "");
2437
2438 /*
2439 * Compile time verifications
2440 */
2441 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2442 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2443 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2444 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2445
2446 /*
2447 * Runtime time verifications
2448 */
2449 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2450 sizeof(uint32_t)));
2451 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2452 sizeof(uint32_t)));
2453 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2454 sizeof(uint32_t)));
2455 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2456 sizeof(uint32_t)));
2457
2458 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2459 sizeof(uint32_t)));
2460 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2461 sizeof(uint32_t)));
2462
2463 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2464 sizeof(uint32_t)));
2465 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2466 sizeof(uint32_t)));
2467 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2468 sizeof(uint32_t)));
2469 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2470 sizeof(uint32_t)));
2471
2472 /*
2473 * Allocate locks
2474 */
2475 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2476 if (cfil_lck_grp_attr == NULL) {
2477 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2478 /* NOTREACHED */
2479 }
2480 cfil_lck_grp = lck_grp_alloc_init("content filter",
2481 cfil_lck_grp_attr);
2482 if (cfil_lck_grp == NULL) {
2483 panic("%s: lck_grp_alloc_init failed", __func__);
2484 /* NOTREACHED */
2485 }
2486 cfil_lck_attr = lck_attr_alloc_init();
2487 if (cfil_lck_attr == NULL) {
2488 panic("%s: lck_attr_alloc_init failed", __func__);
2489 /* NOTREACHED */
2490 }
2491 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2492
2493 TAILQ_INIT(&cfil_sock_head);
2494 TAILQ_INIT(&cfil_sock_head_stats);
2495
2496 /*
2497 * Register kernel control
2498 */
2499 bzero(&kern_ctl, sizeof(kern_ctl));
2500 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2501 sizeof(kern_ctl.ctl_name));
2502 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2503 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2504 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2505 kern_ctl.ctl_connect = cfil_ctl_connect;
2506 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2507 kern_ctl.ctl_send = cfil_ctl_send;
2508 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2509 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2510 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2511 error = ctl_register(&kern_ctl, &cfil_kctlref);
2512 if (error != 0) {
2513 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2514 return;
2515 }
2516
2517 // Spawn thread for gargage collection
2518 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2519 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2520 panic_plain("%s: Can't create UDP GC thread", __func__);
2521 /* NOTREACHED */
2522 }
2523 /* this must not fail */
2524 VERIFY(cfil_udp_gc_thread != NULL);
2525
2526 // Spawn thread for statistics reporting
2527 if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2528 &cfil_stats_report_thread) != KERN_SUCCESS) {
2529 panic_plain("%s: Can't create statistics report thread", __func__);
2530 /* NOTREACHED */
2531 }
2532 /* this must not fail */
2533 VERIFY(cfil_stats_report_thread != NULL);
2534
2535 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2536 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2537 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2538 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2539
2540 memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2541 }
2542
2543 struct cfil_info *
2544 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2545 {
2546 int kcunit;
2547 struct cfil_info *cfil_info = NULL;
2548 struct inpcb *inp = sotoinpcb(so);
2549
2550 CFIL_LOG(LOG_INFO, "");
2551
2552 socket_lock_assert_owned(so);
2553
2554 cfil_info = zalloc(cfil_info_zone);
2555 if (cfil_info == NULL) {
2556 goto done;
2557 }
2558 bzero(cfil_info, sizeof(struct cfil_info));
2559 os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2560
2561 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2562 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2563
2564 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2565 struct cfil_entry *entry;
2566
2567 entry = &cfil_info->cfi_entries[kcunit - 1];
2568 entry->cfe_cfil_info = cfil_info;
2569
2570 /* Initialize the filter entry */
2571 entry->cfe_filter = NULL;
2572 entry->cfe_flags = 0;
2573 entry->cfe_necp_control_unit = 0;
2574 entry->cfe_snd.cfe_pass_offset = 0;
2575 entry->cfe_snd.cfe_peek_offset = 0;
2576 entry->cfe_snd.cfe_peeked = 0;
2577 entry->cfe_rcv.cfe_pass_offset = 0;
2578 entry->cfe_rcv.cfe_peek_offset = 0;
2579 entry->cfe_rcv.cfe_peeked = 0;
2580 /*
2581 * Timestamp the last action to avoid pre-maturely
2582 * triggering garbage collection
2583 */
2584 microuptime(&entry->cfe_last_action);
2585
2586 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2587 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2588 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2589 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2590 }
2591
2592 cfil_rw_lock_exclusive(&cfil_lck_rw);
2593
2594 /*
2595 * Create a cfi_sock_id that's not the socket pointer!
2596 */
2597
2598 if (hash_entry == NULL) {
2599 // This is the TCP case, cfil_info is tracked per socket
2600 if (inp->inp_flowhash == 0) {
2601 inp->inp_flowhash = inp_calc_flowhash(inp);
2602 }
2603
2604 so->so_cfil = cfil_info;
2605 cfil_info->cfi_so = so;
2606 cfil_info->cfi_sock_id =
2607 ((so->so_gencnt << 32) | inp->inp_flowhash);
2608 } else {
2609 // This is the UDP case, cfil_info is tracked in per-socket hash
2610 cfil_info->cfi_so = so;
2611 hash_entry->cfentry_cfil = cfil_info;
2612 cfil_info->cfi_hash_entry = hash_entry;
2613 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2614 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2615 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2616
2617 // Wake up gc thread if this is first flow added
2618 if (cfil_sock_udp_attached_count == 0) {
2619 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2620 }
2621
2622 cfil_sock_udp_attached_count++;
2623 }
2624
2625 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2626 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2627
2628 cfil_sock_attached_count++;
2629
2630 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2631
2632 done:
2633 if (cfil_info != NULL) {
2634 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2635 } else {
2636 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2637 }
2638
2639 return cfil_info;
2640 }
2641
2642 int
2643 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2644 {
2645 int kcunit;
2646 int attached = 0;
2647
2648 CFIL_LOG(LOG_INFO, "");
2649
2650 socket_lock_assert_owned(so);
2651
2652 cfil_rw_lock_exclusive(&cfil_lck_rw);
2653
2654 for (kcunit = 1;
2655 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2656 kcunit++) {
2657 struct content_filter *cfc = content_filters[kcunit - 1];
2658 struct cfil_entry *entry;
2659 struct cfil_entry *iter_entry;
2660 struct cfil_entry *iter_prev;
2661
2662 if (cfc == NULL) {
2663 continue;
2664 }
2665 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2666 continue;
2667 }
2668
2669 entry = &cfil_info->cfi_entries[kcunit - 1];
2670
2671 entry->cfe_filter = cfc;
2672 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2673 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2674 cfc->cf_sock_count++;
2675
2676 /* Insert the entry into the list ordered by control unit */
2677 iter_prev = NULL;
2678 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2679 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2680 break;
2681 }
2682 iter_prev = iter_entry;
2683 }
2684
2685 if (iter_prev == NULL) {
2686 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2687 } else {
2688 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2689 }
2690
2691 verify_content_filter(cfc);
2692 attached = 1;
2693 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2694 }
2695
2696 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2697
2698 return attached;
2699 }
2700
2701 static void
2702 cfil_info_free(struct cfil_info *cfil_info)
2703 {
2704 int kcunit;
2705 uint64_t in_drain = 0;
2706 uint64_t out_drained = 0;
2707
2708 if (cfil_info == NULL) {
2709 return;
2710 }
2711
2712 CFIL_LOG(LOG_INFO, "");
2713
2714 cfil_rw_lock_exclusive(&cfil_lck_rw);
2715
2716 for (kcunit = 1;
2717 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2718 kcunit++) {
2719 struct cfil_entry *entry;
2720 struct content_filter *cfc;
2721
2722 entry = &cfil_info->cfi_entries[kcunit - 1];
2723
2724 /* Don't be silly and try to detach twice */
2725 if (entry->cfe_filter == NULL) {
2726 continue;
2727 }
2728
2729 cfc = content_filters[kcunit - 1];
2730
2731 VERIFY(cfc == entry->cfe_filter);
2732
2733 entry->cfe_filter = NULL;
2734 entry->cfe_necp_control_unit = 0;
2735 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2736 cfc->cf_sock_count--;
2737
2738 verify_content_filter(cfc);
2739 }
2740 if (cfil_info->cfi_hash_entry != NULL) {
2741 cfil_sock_udp_attached_count--;
2742 }
2743 cfil_sock_attached_count--;
2744 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2745
2746 // Turn off stats reporting for cfil_info.
2747 cfil_info_stats_toggle(cfil_info, NULL, 0);
2748
2749 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2750 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2751
2752 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2753 struct cfil_entry *entry;
2754
2755 entry = &cfil_info->cfi_entries[kcunit - 1];
2756 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2757 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2758 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2759 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2760 }
2761 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2762
2763 if (out_drained) {
2764 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2765 }
2766 if (in_drain) {
2767 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2768 }
2769
2770 zfree(cfil_info_zone, cfil_info);
2771 }
2772
2773 /*
2774 * Received a verdict from userspace for a socket.
2775 * Perform any delayed operation if needed.
2776 */
2777 static void
2778 cfil_sock_received_verdict(struct socket *so)
2779 {
2780 if (so == NULL || so->so_cfil == NULL) {
2781 return;
2782 }
2783
2784 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2785
2786 /*
2787 * If socket has already been connected, trigger
2788 * soisconnected now.
2789 */
2790 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2791 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2792 soisconnected(so);
2793 return;
2794 }
2795 }
2796
2797 /*
2798 * Entry point from Sockets layer
2799 * The socket is locked.
2800 *
2801 * Checks if a connected socket is subject to filter and
2802 * pending the initial verdict.
2803 */
2804 boolean_t
2805 cfil_sock_connected_pending_verdict(struct socket *so)
2806 {
2807 if (so == NULL || so->so_cfil == NULL) {
2808 return false;
2809 }
2810
2811 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2812 return false;
2813 } else {
2814 /*
2815 * Remember that this protocol is already connected, so
2816 * we will trigger soisconnected() upon receipt of
2817 * initial verdict later.
2818 */
2819 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2820 return true;
2821 }
2822 }
2823
2824 boolean_t
2825 cfil_filter_present(void)
2826 {
2827 return cfil_active_count > 0;
2828 }
2829
2830 /*
2831 * Entry point from Sockets layer
2832 * The socket is locked.
2833 */
2834 errno_t
2835 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2836 {
2837 errno_t error = 0;
2838 uint32_t filter_control_unit;
2839
2840 socket_lock_assert_owned(so);
2841
2842 if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2843 /*
2844 * This socket has already been evaluated (and ultimately skipped) by
2845 * flow divert, so it has also already been through content filter if there
2846 * is one.
2847 */
2848 goto done;
2849 }
2850
2851 /* Limit ourselves to TCP that are not MPTCP subflows */
2852 if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2853 goto done;
2854 }
2855
2856 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2857 if (filter_control_unit == 0) {
2858 goto done;
2859 }
2860
2861 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2862 goto done;
2863 }
2864 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2865 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2866 goto done;
2867 }
2868 if (cfil_active_count == 0) {
2869 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2870 goto done;
2871 }
2872 if (so->so_cfil != NULL) {
2873 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2874 CFIL_LOG(LOG_ERR, "already attached");
2875 goto done;
2876 } else {
2877 cfil_info_alloc(so, NULL);
2878 if (so->so_cfil == NULL) {
2879 error = ENOMEM;
2880 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2881 goto done;
2882 }
2883 so->so_cfil->cfi_dir = dir;
2884 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
2885 }
2886 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2887 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2888 filter_control_unit);
2889 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2890 goto done;
2891 }
2892 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2893 (uint64_t)VM_KERNEL_ADDRPERM(so),
2894 filter_control_unit, so->so_cfil->cfi_sock_id);
2895
2896 so->so_flags |= SOF_CONTENT_FILTER;
2897 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2898
2899 /* Hold a reference on the socket */
2900 so->so_usecount++;
2901
2902 /*
2903 * Save passed addresses for attach event msg (in case resend
2904 * is needed.
2905 */
2906 if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
2907 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2908 }
2909 if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
2910 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2911 }
2912
2913 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2914 /* We can recover from flow control or out of memory errors */
2915 if (error == ENOBUFS || error == ENOMEM) {
2916 error = 0;
2917 } else if (error != 0) {
2918 goto done;
2919 }
2920
2921 CFIL_INFO_VERIFY(so->so_cfil);
2922 done:
2923 return error;
2924 }
2925
2926 /*
2927 * Entry point from Sockets layer
2928 * The socket is locked.
2929 */
2930 errno_t
2931 cfil_sock_detach(struct socket *so)
2932 {
2933 if (IS_IP_DGRAM(so)) {
2934 cfil_db_free(so);
2935 return 0;
2936 }
2937
2938 if (so->so_cfil) {
2939 if (so->so_flags & SOF_CONTENT_FILTER) {
2940 so->so_flags &= ~SOF_CONTENT_FILTER;
2941 VERIFY(so->so_usecount > 0);
2942 so->so_usecount--;
2943 }
2944 CFIL_INFO_FREE(so->so_cfil);
2945 so->so_cfil = NULL;
2946 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2947 }
2948 return 0;
2949 }
2950
2951 /*
2952 * Fill in the address info of an event message from either
2953 * the socket or passed in address info.
2954 */
2955 static void
2956 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2957 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2958 boolean_t isIPv4, boolean_t outgoing)
2959 {
2960 if (isIPv4) {
2961 struct in_addr laddr = {0}, faddr = {0};
2962 u_int16_t lport = 0, fport = 0;
2963
2964 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2965
2966 if (outgoing) {
2967 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2968 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2969 } else {
2970 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2971 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2972 }
2973 } else {
2974 struct in6_addr *laddr = NULL, *faddr = NULL;
2975 u_int16_t lport = 0, fport = 0;
2976
2977 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2978 if (outgoing) {
2979 fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2980 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2981 } else {
2982 fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2983 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2984 }
2985 }
2986 }
2987
2988 static boolean_t
2989 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2990 struct cfil_info *cfil_info,
2991 struct cfil_msg_sock_attached *msg)
2992 {
2993 struct cfil_crypto_data data = {};
2994
2995 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2996 return false;
2997 }
2998
2999 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
3000 data.direction = msg->cfs_conn_dir;
3001
3002 data.pid = msg->cfs_pid;
3003 data.effective_pid = msg->cfs_e_pid;
3004 uuid_copy(data.uuid, msg->cfs_uuid);
3005 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
3006 data.socketProtocol = msg->cfs_sock_protocol;
3007 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3008 data.remote.sin6 = msg->cfs_dst.sin6;
3009 data.local.sin6 = msg->cfs_src.sin6;
3010 } else {
3011 data.remote.sin6 = msg->cfs_src.sin6;
3012 data.local.sin6 = msg->cfs_dst.sin6;
3013 }
3014
3015 // At attach, if local address is already present, no need to re-sign subsequent data messages.
3016 if (!NULLADDRESS(data.local)) {
3017 cfil_info->cfi_isSignatureLatest = true;
3018 }
3019
3020 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3021 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3022 msg->cfs_signature_length = 0;
3023 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
3024 msg->cfs_msghdr.cfm_sock_id);
3025 return false;
3026 }
3027
3028 return true;
3029 }
3030
3031 static boolean_t
3032 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3033 struct socket *so, struct cfil_info *cfil_info,
3034 struct cfil_msg_data_event *msg)
3035 {
3036 struct cfil_crypto_data data = {};
3037
3038 if (crypto_state == NULL || msg == NULL ||
3039 so == NULL || cfil_info == NULL) {
3040 return false;
3041 }
3042
3043 data.sock_id = cfil_info->cfi_sock_id;
3044 data.direction = cfil_info->cfi_dir;
3045 data.pid = so->last_pid;
3046 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3047 if (so->so_flags & SOF_DELEGATED) {
3048 data.effective_pid = so->e_pid;
3049 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3050 } else {
3051 data.effective_pid = so->last_pid;
3052 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3053 }
3054 data.socketProtocol = so->so_proto->pr_protocol;
3055
3056 if (data.direction == CFS_CONNECTION_DIR_OUT) {
3057 data.remote.sin6 = msg->cfc_dst.sin6;
3058 data.local.sin6 = msg->cfc_src.sin6;
3059 } else {
3060 data.remote.sin6 = msg->cfc_src.sin6;
3061 data.local.sin6 = msg->cfc_dst.sin6;
3062 }
3063
3064 // At first data, local address may show up for the first time, update address cache and
3065 // no need to re-sign subsequent data messages anymore.
3066 if (!NULLADDRESS(data.local)) {
3067 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3068 cfil_info->cfi_isSignatureLatest = true;
3069 }
3070
3071 msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3072 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3073 msg->cfd_signature_length = 0;
3074 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3075 msg->cfd_msghdr.cfm_sock_id);
3076 return false;
3077 }
3078
3079 return true;
3080 }
3081
3082 static boolean_t
3083 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3084 struct socket *so, struct cfil_info *cfil_info,
3085 struct cfil_msg_sock_closed *msg)
3086 {
3087 struct cfil_crypto_data data = {};
3088 struct cfil_hash_entry hash_entry = {};
3089 struct cfil_hash_entry *hash_entry_ptr = NULL;
3090 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3091
3092 if (crypto_state == NULL || msg == NULL ||
3093 so == NULL || inp == NULL || cfil_info == NULL) {
3094 return false;
3095 }
3096
3097 data.sock_id = cfil_info->cfi_sock_id;
3098 data.direction = cfil_info->cfi_dir;
3099
3100 data.pid = so->last_pid;
3101 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3102 if (so->so_flags & SOF_DELEGATED) {
3103 data.effective_pid = so->e_pid;
3104 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3105 } else {
3106 data.effective_pid = so->last_pid;
3107 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3108 }
3109 data.socketProtocol = so->so_proto->pr_protocol;
3110
3111 /*
3112 * Fill in address info:
3113 * For UDP, use the cfil_info hash entry directly.
3114 * For TCP, compose an hash entry with the saved addresses.
3115 */
3116 if (cfil_info->cfi_hash_entry != NULL) {
3117 hash_entry_ptr = cfil_info->cfi_hash_entry;
3118 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3119 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3120 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3121 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3122 hash_entry_ptr = &hash_entry;
3123 }
3124 if (hash_entry_ptr != NULL) {
3125 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3126 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3127 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3128 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3129 }
3130
3131 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3132 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3133
3134 msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3135 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3136 msg->cfc_signature_length = 0;
3137 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3138 msg->cfc_msghdr.cfm_sock_id);
3139 return false;
3140 }
3141
3142 return true;
3143 }
3144
3145 static int
3146 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3147 uint32_t kcunit, int conn_dir)
3148 {
3149 errno_t error = 0;
3150 struct cfil_entry *entry = NULL;
3151 struct cfil_msg_sock_attached msg_attached;
3152 struct content_filter *cfc = NULL;
3153 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3154 struct cfil_hash_entry *hash_entry_ptr = NULL;
3155 struct cfil_hash_entry hash_entry;
3156
3157 memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3158 proc_t p = PROC_NULL;
3159 task_t t = TASK_NULL;
3160
3161 socket_lock_assert_owned(so);
3162
3163 cfil_rw_lock_shared(&cfil_lck_rw);
3164
3165 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3166 error = EINVAL;
3167 goto done;
3168 }
3169
3170 if (kcunit == 0) {
3171 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3172 } else {
3173 entry = &cfil_info->cfi_entries[kcunit - 1];
3174 }
3175
3176 if (entry == NULL) {
3177 goto done;
3178 }
3179
3180 cfc = entry->cfe_filter;
3181 if (cfc == NULL) {
3182 goto done;
3183 }
3184
3185 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3186 goto done;
3187 }
3188
3189 if (kcunit == 0) {
3190 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3191 }
3192
3193 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3194 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3195
3196 /* Would be wasteful to try when flow controlled */
3197 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3198 error = ENOBUFS;
3199 goto done;
3200 }
3201
3202 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3203 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3204 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3205 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3206 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3207 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3208
3209 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3210 msg_attached.cfs_sock_type = so->so_proto->pr_type;
3211 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3212 msg_attached.cfs_pid = so->last_pid;
3213 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3214 if (so->so_flags & SOF_DELEGATED) {
3215 msg_attached.cfs_e_pid = so->e_pid;
3216 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3217 } else {
3218 msg_attached.cfs_e_pid = so->last_pid;
3219 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3220 }
3221
3222 /*
3223 * Fill in address info:
3224 * For UDP, use the cfil_info hash entry directly.
3225 * For TCP, compose an hash entry with the saved addresses.
3226 */
3227 if (cfil_info->cfi_hash_entry != NULL) {
3228 hash_entry_ptr = cfil_info->cfi_hash_entry;
3229 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3230 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3231 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3232 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3233 hash_entry_ptr = &hash_entry;
3234 }
3235 if (hash_entry_ptr != NULL) {
3236 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3237 &msg_attached.cfs_src, &msg_attached.cfs_dst,
3238 !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3239 }
3240 msg_attached.cfs_conn_dir = conn_dir;
3241
3242 if (msg_attached.cfs_e_pid != 0) {
3243 p = proc_find(msg_attached.cfs_e_pid);
3244 if (p != PROC_NULL) {
3245 t = proc_task(p);
3246 if (t != TASK_NULL) {
3247 audit_token_t audit_token;
3248 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3249 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3250 memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3251 } else {
3252 CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3253 entry->cfe_cfil_info->cfi_sock_id);
3254 }
3255 }
3256 proc_rele(p);
3257 }
3258 }
3259
3260 if (cfil_info->cfi_debug) {
3261 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3262 }
3263
3264 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3265
3266 #if LIFECYCLE_DEBUG
3267 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3268 entry->cfe_cfil_info->cfi_sock_id);
3269 #endif
3270
3271 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3272 entry->cfe_filter->cf_kcunit,
3273 &msg_attached,
3274 sizeof(struct cfil_msg_sock_attached),
3275 CTL_DATA_EOR);
3276 if (error != 0) {
3277 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3278 goto done;
3279 }
3280 microuptime(&entry->cfe_last_event);
3281 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3282 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3283
3284 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3285 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3286 done:
3287
3288 /* We can recover from flow control */
3289 if (error == ENOBUFS) {
3290 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3291 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3292
3293 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3294 cfil_rw_lock_exclusive(&cfil_lck_rw);
3295 }
3296
3297 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3298
3299 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3300 } else {
3301 if (error != 0) {
3302 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3303 }
3304
3305 cfil_rw_unlock_shared(&cfil_lck_rw);
3306 }
3307 return error;
3308 }
3309
3310 static int
3311 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3312 {
3313 errno_t error = 0;
3314 struct mbuf *msg = NULL;
3315 struct cfil_entry *entry;
3316 struct cfe_buf *entrybuf;
3317 struct cfil_msg_hdr msg_disconnected;
3318 struct content_filter *cfc;
3319
3320 socket_lock_assert_owned(so);
3321
3322 cfil_rw_lock_shared(&cfil_lck_rw);
3323
3324 entry = &cfil_info->cfi_entries[kcunit - 1];
3325 if (outgoing) {
3326 entrybuf = &entry->cfe_snd;
3327 } else {
3328 entrybuf = &entry->cfe_rcv;
3329 }
3330
3331 cfc = entry->cfe_filter;
3332 if (cfc == NULL) {
3333 goto done;
3334 }
3335
3336 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3337 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3338
3339 /*
3340 * Send the disconnection event once
3341 */
3342 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3343 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3344 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3345 (uint64_t)VM_KERNEL_ADDRPERM(so));
3346 goto done;
3347 }
3348
3349 /*
3350 * We're not disconnected as long as some data is waiting
3351 * to be delivered to the filter
3352 */
3353 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3354 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3355 (uint64_t)VM_KERNEL_ADDRPERM(so));
3356 error = EBUSY;
3357 goto done;
3358 }
3359 /* Would be wasteful to try when flow controlled */
3360 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3361 error = ENOBUFS;
3362 goto done;
3363 }
3364
3365 if (cfil_info->cfi_debug) {
3366 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3367 }
3368
3369 #if LIFECYCLE_DEBUG
3370 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3371 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3372 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3373 #endif
3374
3375 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3376 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3377 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3378 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3379 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3380 CFM_OP_DISCONNECT_IN;
3381 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3382 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3383 entry->cfe_filter->cf_kcunit,
3384 &msg_disconnected,
3385 sizeof(struct cfil_msg_hdr),
3386 CTL_DATA_EOR);
3387 if (error != 0) {
3388 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3389 mbuf_freem(msg);
3390 goto done;
3391 }
3392 microuptime(&entry->cfe_last_event);
3393 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3394
3395 /* Remember we have sent the disconnection message */
3396 if (outgoing) {
3397 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3398 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3399 } else {
3400 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3401 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3402 }
3403 done:
3404 if (error == ENOBUFS) {
3405 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3406 OSIncrementAtomic(
3407 &cfil_stats.cfs_disconnect_event_flow_control);
3408
3409 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3410 cfil_rw_lock_exclusive(&cfil_lck_rw);
3411 }
3412
3413 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3414
3415 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3416 } else {
3417 if (error != 0) {
3418 OSIncrementAtomic(
3419 &cfil_stats.cfs_disconnect_event_fail);
3420 }
3421
3422 cfil_rw_unlock_shared(&cfil_lck_rw);
3423 }
3424 return error;
3425 }
3426
3427 int
3428 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3429 {
3430 struct cfil_entry *entry;
3431 struct cfil_msg_sock_closed msg_closed;
3432 errno_t error = 0;
3433 struct content_filter *cfc;
3434
3435 socket_lock_assert_owned(so);
3436
3437 cfil_rw_lock_shared(&cfil_lck_rw);
3438
3439 entry = &cfil_info->cfi_entries[kcunit - 1];
3440 cfc = entry->cfe_filter;
3441 if (cfc == NULL) {
3442 goto done;
3443 }
3444
3445 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3446 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3447
3448 /* Would be wasteful to try when flow controlled */
3449 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3450 error = ENOBUFS;
3451 goto done;
3452 }
3453 /*
3454 * Send a single closed message per filter
3455 */
3456 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3457 goto done;
3458 }
3459 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3460 goto done;
3461 }
3462
3463 microuptime(&entry->cfe_last_event);
3464 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3465
3466 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3467 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3468 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3469 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3470 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3471 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3472 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3473 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3474 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3475 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3476 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3477 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3478 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3479
3480 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3481
3482 if (cfil_info->cfi_debug) {
3483 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3484 }
3485
3486 #if LIFECYCLE_DEBUG
3487 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3488 #endif
3489 /* for debugging
3490 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3491 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3492 * }
3493 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3494 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3495 * }
3496 */
3497
3498 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3499 entry->cfe_filter->cf_kcunit,
3500 &msg_closed,
3501 sizeof(struct cfil_msg_sock_closed),
3502 CTL_DATA_EOR);
3503 if (error != 0) {
3504 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3505 error);
3506 goto done;
3507 }
3508
3509 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3510 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3511 done:
3512 /* We can recover from flow control */
3513 if (error == ENOBUFS) {
3514 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3515 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3516
3517 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3518 cfil_rw_lock_exclusive(&cfil_lck_rw);
3519 }
3520
3521 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3522
3523 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3524 } else {
3525 if (error != 0) {
3526 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3527 }
3528
3529 cfil_rw_unlock_shared(&cfil_lck_rw);
3530 }
3531
3532 return error;
3533 }
3534
3535 static void
3536 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3537 struct in6_addr *ip6, u_int16_t port)
3538 {
3539 if (sin46 == NULL) {
3540 return;
3541 }
3542
3543 struct sockaddr_in6 *sin6 = &sin46->sin6;
3544
3545 sin6->sin6_family = AF_INET6;
3546 sin6->sin6_len = sizeof(*sin6);
3547 sin6->sin6_port = port;
3548 sin6->sin6_addr = *ip6;
3549 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3550 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3551 sin6->sin6_addr.s6_addr16[1] = 0;
3552 }
3553 }
3554
3555 static void
3556 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3557 struct in_addr ip, u_int16_t port)
3558 {
3559 if (sin46 == NULL) {
3560 return;
3561 }
3562
3563 struct sockaddr_in *sin = &sin46->sin;
3564
3565 sin->sin_family = AF_INET;
3566 sin->sin_len = sizeof(*sin);
3567 sin->sin_port = port;
3568 sin->sin_addr.s_addr = ip.s_addr;
3569 }
3570
3571 static void
3572 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3573 struct in6_addr **laddr, struct in6_addr **faddr,
3574 u_int16_t *lport, u_int16_t *fport)
3575 {
3576 if (entry != NULL) {
3577 *laddr = &entry->cfentry_laddr.addr6;
3578 *faddr = &entry->cfentry_faddr.addr6;
3579 *lport = entry->cfentry_lport;
3580 *fport = entry->cfentry_fport;
3581 } else {
3582 *laddr = &inp->in6p_laddr;
3583 *faddr = &inp->in6p_faddr;
3584 *lport = inp->inp_lport;
3585 *fport = inp->inp_fport;
3586 }
3587 }
3588
3589 static void
3590 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3591 struct in_addr *laddr, struct in_addr *faddr,
3592 u_int16_t *lport, u_int16_t *fport)
3593 {
3594 if (entry != NULL) {
3595 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3596 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3597 *lport = entry->cfentry_lport;
3598 *fport = entry->cfentry_fport;
3599 } else {
3600 *laddr = inp->inp_laddr;
3601 *faddr = inp->inp_faddr;
3602 *lport = inp->inp_lport;
3603 *fport = inp->inp_fport;
3604 }
3605 }
3606
3607 static int
3608 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3609 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3610 {
3611 errno_t error = 0;
3612 struct mbuf *copy = NULL;
3613 struct mbuf *msg = NULL;
3614 unsigned int one = 1;
3615 struct cfil_msg_data_event *data_req;
3616 size_t hdrsize;
3617 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3618 struct cfil_entry *entry;
3619 struct cfe_buf *entrybuf;
3620 struct content_filter *cfc;
3621 struct timeval tv;
3622 int inp_flags = 0;
3623
3624 cfil_rw_lock_shared(&cfil_lck_rw);
3625
3626 entry = &cfil_info->cfi_entries[kcunit - 1];
3627 if (outgoing) {
3628 entrybuf = &entry->cfe_snd;
3629 } else {
3630 entrybuf = &entry->cfe_rcv;
3631 }
3632
3633 cfc = entry->cfe_filter;
3634 if (cfc == NULL) {
3635 goto done;
3636 }
3637
3638 data = cfil_data_start(data);
3639 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3640 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3641 goto done;
3642 }
3643
3644 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3645 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3646
3647 socket_lock_assert_owned(so);
3648
3649 /* Would be wasteful to try */
3650 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3651 error = ENOBUFS;
3652 goto done;
3653 }
3654
3655 /* Make a copy of the data to pass to kernel control socket */
3656 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3657 M_COPYM_NOOP_HDR);
3658 if (copy == NULL) {
3659 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3660 error = ENOMEM;
3661 goto done;
3662 }
3663
3664 /* We need an mbuf packet for the message header */
3665 hdrsize = sizeof(struct cfil_msg_data_event);
3666 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3667 if (error != 0) {
3668 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3669 m_freem(copy);
3670 /*
3671 * ENOBUFS is to indicate flow control
3672 */
3673 error = ENOMEM;
3674 goto done;
3675 }
3676 mbuf_setlen(msg, hdrsize);
3677 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3678 msg->m_next = copy;
3679 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3680 bzero(data_req, hdrsize);
3681 data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3682 data_req->cfd_msghdr.cfm_version = 1;
3683 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3684 data_req->cfd_msghdr.cfm_op =
3685 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3686 data_req->cfd_msghdr.cfm_sock_id =
3687 entry->cfe_cfil_info->cfi_sock_id;
3688 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3689 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3690
3691 data_req->cfd_flags = 0;
3692 if (OPTIONAL_IP_HEADER(so)) {
3693 /*
3694 * For non-UDP/TCP traffic, indicate to filters if optional
3695 * IP header is present:
3696 * outgoing - indicate according to INP_HDRINCL flag
3697 * incoming - For IPv4 only, stripping of IP header is
3698 * optional. But for CFIL, we delay stripping
3699 * at rip_input. So CFIL always expects IP
3700 * frames. IP header will be stripped according
3701 * to INP_STRIPHDR flag later at reinjection.
3702 */
3703 if ((!outgoing && !IS_INP_V6(inp)) ||
3704 (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3705 data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3706 }
3707 }
3708
3709 /*
3710 * Copy address/port into event msg.
3711 * For non connected sockets need to copy addresses from passed
3712 * parameters
3713 */
3714 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3715 &data_req->cfc_src, &data_req->cfc_dst,
3716 !IS_INP_V6(inp), outgoing);
3717
3718 if (cfil_info->cfi_debug) {
3719 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3720 }
3721
3722 if (cfil_info->cfi_isSignatureLatest == false) {
3723 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3724 }
3725
3726 microuptime(&tv);
3727 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3728
3729 /* Pass the message to the content filter */
3730 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3731 entry->cfe_filter->cf_kcunit,
3732 msg, CTL_DATA_EOR);
3733 if (error != 0) {
3734 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3735 mbuf_freem(msg);
3736 goto done;
3737 }
3738 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3739 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3740
3741 #if VERDICT_DEBUG
3742 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3743 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3744 #endif
3745
3746 if (cfil_info->cfi_debug) {
3747 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3748 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3749 data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3750 }
3751
3752 done:
3753 if (error == ENOBUFS) {
3754 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3755 OSIncrementAtomic(
3756 &cfil_stats.cfs_data_event_flow_control);
3757
3758 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3759 cfil_rw_lock_exclusive(&cfil_lck_rw);
3760 }
3761
3762 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3763
3764 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3765 } else {
3766 if (error != 0) {
3767 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3768 }
3769
3770 cfil_rw_unlock_shared(&cfil_lck_rw);
3771 }
3772 return error;
3773 }
3774
3775 /*
3776 * Process the queue of data waiting to be delivered to content filter
3777 */
3778 static int
3779 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3780 {
3781 errno_t error = 0;
3782 struct mbuf *data, *tmp = NULL;
3783 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3784 struct cfil_entry *entry;
3785 struct cfe_buf *entrybuf;
3786 uint64_t currentoffset = 0;
3787
3788 if (cfil_info == NULL) {
3789 return 0;
3790 }
3791
3792 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3793 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3794
3795 socket_lock_assert_owned(so);
3796
3797 entry = &cfil_info->cfi_entries[kcunit - 1];
3798 if (outgoing) {
3799 entrybuf = &entry->cfe_snd;
3800 } else {
3801 entrybuf = &entry->cfe_rcv;
3802 }
3803
3804 /* Send attached message if not yet done */
3805 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3806 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3807 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3808 if (error != 0) {
3809 /* We can recover from flow control */
3810 if (error == ENOBUFS || error == ENOMEM) {
3811 error = 0;
3812 }
3813 goto done;
3814 }
3815 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3816 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3817 goto done;
3818 }
3819
3820 #if DATA_DEBUG
3821 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3822 entrybuf->cfe_pass_offset,
3823 entrybuf->cfe_peeked,
3824 entrybuf->cfe_peek_offset);
3825 #endif
3826
3827 /* Move all data that can pass */
3828 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3829 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3830 datalen = cfil_data_length(data, NULL, NULL);
3831 tmp = data;
3832
3833 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3834 entrybuf->cfe_pass_offset) {
3835 /*
3836 * The first mbuf can fully pass
3837 */
3838 copylen = datalen;
3839 } else {
3840 /*
3841 * The first mbuf can partially pass
3842 */
3843 copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
3844 }
3845 VERIFY(copylen <= datalen);
3846
3847 #if DATA_DEBUG
3848 CFIL_LOG(LOG_DEBUG,
3849 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3850 "datalen %u copylen %u",
3851 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3852 entrybuf->cfe_ctl_q.q_start,
3853 entrybuf->cfe_peeked,
3854 entrybuf->cfe_pass_offset,
3855 entrybuf->cfe_peek_offset,
3856 datalen, copylen);
3857 #endif
3858
3859 /*
3860 * Data that passes has been peeked at explicitly or
3861 * implicitly
3862 */
3863 if (entrybuf->cfe_ctl_q.q_start + copylen >
3864 entrybuf->cfe_peeked) {
3865 entrybuf->cfe_peeked =
3866 entrybuf->cfe_ctl_q.q_start + copylen;
3867 }
3868 /*
3869 * Stop on partial pass
3870 */
3871 if (copylen < datalen) {
3872 break;
3873 }
3874
3875 /* All good, move full data from ctl queue to pending queue */
3876 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3877
3878 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3879 if (outgoing) {
3880 OSAddAtomic64(datalen,
3881 &cfil_stats.cfs_pending_q_out_enqueued);
3882 } else {
3883 OSAddAtomic64(datalen,
3884 &cfil_stats.cfs_pending_q_in_enqueued);
3885 }
3886 }
3887 CFIL_INFO_VERIFY(cfil_info);
3888 if (tmp != NULL) {
3889 CFIL_LOG(LOG_DEBUG,
3890 "%llx first %llu peeked %llu pass %llu peek %llu"
3891 "datalen %u copylen %u",
3892 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3893 entrybuf->cfe_ctl_q.q_start,
3894 entrybuf->cfe_peeked,
3895 entrybuf->cfe_pass_offset,
3896 entrybuf->cfe_peek_offset,
3897 datalen, copylen);
3898 }
3899 tmp = NULL;
3900
3901 /* Now deal with remaining data the filter wants to peek at */
3902 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3903 currentoffset = entrybuf->cfe_ctl_q.q_start;
3904 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3905 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3906 currentoffset += datalen) {
3907 datalen = cfil_data_length(data, NULL, NULL);
3908 tmp = data;
3909
3910 /* We've already peeked at this mbuf */
3911 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3912 continue;
3913 }
3914 /*
3915 * The data in the first mbuf may have been
3916 * partially peeked at
3917 */
3918 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
3919 VERIFY(copyoffset < datalen);
3920 copylen = datalen - copyoffset;
3921 VERIFY(copylen <= datalen);
3922 /*
3923 * Do not copy more than needed
3924 */
3925 if (currentoffset + copyoffset + copylen >
3926 entrybuf->cfe_peek_offset) {
3927 copylen = (unsigned int)(entrybuf->cfe_peek_offset -
3928 (currentoffset + copyoffset));
3929 }
3930
3931 #if DATA_DEBUG
3932 CFIL_LOG(LOG_DEBUG,
3933 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3934 "datalen %u copylen %u copyoffset %u",
3935 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3936 currentoffset,
3937 entrybuf->cfe_peeked,
3938 entrybuf->cfe_pass_offset,
3939 entrybuf->cfe_peek_offset,
3940 datalen, copylen, copyoffset);
3941 #endif
3942
3943 /*
3944 * Stop if there is nothing more to peek at
3945 */
3946 if (copylen == 0) {
3947 break;
3948 }
3949 /*
3950 * Let the filter get a peek at this span of data
3951 */
3952 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3953 outgoing, data, copyoffset, copylen);
3954 if (error != 0) {
3955 /* On error, leave data in ctl_q */
3956 break;
3957 }
3958 entrybuf->cfe_peeked += copylen;
3959 if (outgoing) {
3960 OSAddAtomic64(copylen,
3961 &cfil_stats.cfs_ctl_q_out_peeked);
3962 } else {
3963 OSAddAtomic64(copylen,
3964 &cfil_stats.cfs_ctl_q_in_peeked);
3965 }
3966
3967 /* Stop when data could not be fully peeked at */
3968 if (copylen + copyoffset < datalen) {
3969 break;
3970 }
3971 }
3972 CFIL_INFO_VERIFY(cfil_info);
3973 if (tmp != NULL) {
3974 CFIL_LOG(LOG_DEBUG,
3975 "%llx first %llu peeked %llu pass %llu peek %llu"
3976 "datalen %u copylen %u copyoffset %u",
3977 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3978 currentoffset,
3979 entrybuf->cfe_peeked,
3980 entrybuf->cfe_pass_offset,
3981 entrybuf->cfe_peek_offset,
3982 datalen, copylen, copyoffset);
3983 }
3984
3985 /*
3986 * Process data that has passed the filter
3987 */
3988 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3989 if (error != 0) {
3990 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3991 error);
3992 goto done;
3993 }
3994
3995 /*
3996 * Dispatch disconnect events that could not be sent
3997 */
3998 if (cfil_info == NULL) {
3999 goto done;
4000 } else if (outgoing) {
4001 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4002 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4003 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
4004 }
4005 } else {
4006 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4007 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4008 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
4009 }
4010 }
4011
4012 done:
4013 CFIL_LOG(LOG_DEBUG,
4014 "first %llu peeked %llu pass %llu peek %llu",
4015 entrybuf->cfe_ctl_q.q_start,
4016 entrybuf->cfe_peeked,
4017 entrybuf->cfe_pass_offset,
4018 entrybuf->cfe_peek_offset);
4019
4020 CFIL_INFO_VERIFY(cfil_info);
4021 return error;
4022 }
4023
4024 /*
4025 * cfil_data_filter()
4026 *
4027 * Process data for a content filter installed on a socket
4028 */
4029 int
4030 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4031 struct mbuf *data, uint32_t datalen)
4032 {
4033 errno_t error = 0;
4034 struct cfil_entry *entry;
4035 struct cfe_buf *entrybuf;
4036
4037 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4038 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4039
4040 socket_lock_assert_owned(so);
4041
4042 entry = &cfil_info->cfi_entries[kcunit - 1];
4043 if (outgoing) {
4044 entrybuf = &entry->cfe_snd;
4045 } else {
4046 entrybuf = &entry->cfe_rcv;
4047 }
4048
4049 /* Are we attached to the filter? */
4050 if (entry->cfe_filter == NULL) {
4051 error = 0;
4052 goto done;
4053 }
4054
4055 /* Dispatch to filters */
4056 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4057 if (outgoing) {
4058 OSAddAtomic64(datalen,
4059 &cfil_stats.cfs_ctl_q_out_enqueued);
4060 } else {
4061 OSAddAtomic64(datalen,
4062 &cfil_stats.cfs_ctl_q_in_enqueued);
4063 }
4064
4065 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4066 if (error != 0) {
4067 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4068 error);
4069 }
4070 /*
4071 * We have to return EJUSTRETURN in all cases to avoid double free
4072 * by socket layer
4073 */
4074 error = EJUSTRETURN;
4075 done:
4076 CFIL_INFO_VERIFY(cfil_info);
4077
4078 CFIL_LOG(LOG_INFO, "return %d", error);
4079 return error;
4080 }
4081
4082 /*
4083 * cfil_service_inject_queue() re-inject data that passed the
4084 * content filters
4085 */
4086 static int
4087 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4088 {
4089 mbuf_t data;
4090 unsigned int datalen;
4091 int mbcnt = 0;
4092 int mbnum = 0;
4093 errno_t error = 0;
4094 struct cfi_buf *cfi_buf;
4095 struct cfil_queue *inject_q;
4096 int need_rwakeup = 0;
4097 int count = 0;
4098 struct inpcb *inp = NULL;
4099 struct ip *ip = NULL;
4100 unsigned int hlen;
4101
4102 if (cfil_info == NULL) {
4103 return 0;
4104 }
4105
4106 socket_lock_assert_owned(so);
4107
4108 if (outgoing) {
4109 cfi_buf = &cfil_info->cfi_snd;
4110 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4111 } else {
4112 cfi_buf = &cfil_info->cfi_rcv;
4113 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4114 }
4115 inject_q = &cfi_buf->cfi_inject_q;
4116
4117 if (cfil_queue_empty(inject_q)) {
4118 return 0;
4119 }
4120
4121 #if DATA_DEBUG | VERDICT_DEBUG
4122 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4123 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4124 #endif
4125
4126 while ((data = cfil_queue_first(inject_q)) != NULL) {
4127 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4128
4129 #if DATA_DEBUG
4130 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4131 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4132 #endif
4133 if (cfil_info->cfi_debug) {
4134 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4135 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4136 }
4137
4138 /* Remove data from queue and adjust stats */
4139 cfil_queue_remove(inject_q, data, datalen);
4140 cfi_buf->cfi_pending_first += datalen;
4141 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4142 cfi_buf->cfi_pending_mbnum -= mbnum;
4143 cfil_info_buf_verify(cfi_buf);
4144
4145 if (outgoing) {
4146 error = sosend_reinject(so, NULL, data, NULL, 0);
4147 if (error != 0) {
4148 #if DATA_DEBUG
4149 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4150 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4151 #endif
4152 break;
4153 }
4154 // At least one injection succeeded, need to wake up pending threads.
4155 need_rwakeup = 1;
4156 } else {
4157 data->m_flags |= M_SKIPCFIL;
4158
4159 /*
4160 * NOTE: We currently only support TCP, UDP, ICMP,
4161 * ICMPv6 and RAWIP. For MPTCP and message TCP we'll
4162 * need to call the appropriate sbappendxxx()
4163 * of fix sock_inject_data_in()
4164 */
4165 if (IS_IP_DGRAM(so)) {
4166 if (OPTIONAL_IP_HEADER(so)) {
4167 inp = sotoinpcb(so);
4168 if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4169 mbuf_t data_start = cfil_data_start(data);
4170 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4171 ip = mtod(data_start, struct ip *);
4172 hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4173 data_start->m_len -= hlen;
4174 data_start->m_pkthdr.len -= hlen;
4175 data_start->m_data += hlen;
4176 }
4177 }
4178 }
4179
4180 if (sbappendchain(&so->so_rcv, data, 0)) {
4181 need_rwakeup = 1;
4182 }
4183 } else {
4184 if (sbappendstream(&so->so_rcv, data)) {
4185 need_rwakeup = 1;
4186 }
4187 }
4188 }
4189
4190 if (outgoing) {
4191 OSAddAtomic64(datalen,
4192 &cfil_stats.cfs_inject_q_out_passed);
4193 } else {
4194 OSAddAtomic64(datalen,
4195 &cfil_stats.cfs_inject_q_in_passed);
4196 }
4197
4198 count++;
4199 }
4200
4201 #if DATA_DEBUG | VERDICT_DEBUG
4202 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4203 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4204 #endif
4205 if (cfil_info->cfi_debug) {
4206 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4207 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4208 }
4209
4210 /* A single wakeup is for several packets is more efficient */
4211 if (need_rwakeup) {
4212 if (outgoing == TRUE) {
4213 sowwakeup(so);
4214 } else {
4215 sorwakeup(so);
4216 }
4217 }
4218
4219 if (error != 0 && cfil_info) {
4220 if (error == ENOBUFS) {
4221 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4222 }
4223 if (error == ENOMEM) {
4224 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4225 }
4226
4227 if (outgoing) {
4228 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4229 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4230 } else {
4231 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4232 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4233 }
4234 }
4235
4236 /*
4237 * Notify
4238 */
4239 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4240 cfil_sock_notify_shutdown(so, SHUT_WR);
4241 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4242 soshutdownlock_final(so, SHUT_WR);
4243 }
4244 }
4245 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4246 if (cfil_filters_attached(so) == 0) {
4247 CFIL_LOG(LOG_INFO, "so %llx waking",
4248 (uint64_t)VM_KERNEL_ADDRPERM(so));
4249 wakeup((caddr_t)cfil_info);
4250 }
4251 }
4252
4253 CFIL_INFO_VERIFY(cfil_info);
4254
4255 return error;
4256 }
4257
4258 static int
4259 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4260 {
4261 uint64_t passlen, curlen;
4262 mbuf_t data;
4263 unsigned int datalen;
4264 errno_t error = 0;
4265 struct cfil_entry *entry;
4266 struct cfe_buf *entrybuf;
4267 struct cfil_queue *pending_q;
4268
4269 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4270 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4271
4272 socket_lock_assert_owned(so);
4273
4274 entry = &cfil_info->cfi_entries[kcunit - 1];
4275 if (outgoing) {
4276 entrybuf = &entry->cfe_snd;
4277 } else {
4278 entrybuf = &entry->cfe_rcv;
4279 }
4280
4281 pending_q = &entrybuf->cfe_pending_q;
4282
4283 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4284
4285 /*
4286 * Locate the chunks of data that we can pass to the next filter
4287 * A data chunk must be on mbuf boundaries
4288 */
4289 curlen = 0;
4290 while ((data = cfil_queue_first(pending_q)) != NULL) {
4291 struct cfil_entry *iter_entry;
4292 datalen = cfil_data_length(data, NULL, NULL);
4293
4294 #if DATA_DEBUG
4295 CFIL_LOG(LOG_DEBUG,
4296 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4297 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4298 passlen, curlen);
4299 #endif
4300
4301 if (curlen + datalen > passlen) {
4302 break;
4303 }
4304
4305 cfil_queue_remove(pending_q, data, datalen);
4306
4307 curlen += datalen;
4308
4309 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4310 iter_entry != NULL;
4311 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4312 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4313 data, datalen);
4314 /* 0 means passed so we can continue */
4315 if (error != 0) {
4316 break;
4317 }
4318 }
4319 /* When data has passed all filters, re-inject */
4320 if (error == 0) {
4321 if (outgoing) {
4322 cfil_queue_enqueue(
4323 &cfil_info->cfi_snd.cfi_inject_q,
4324 data, datalen);
4325 OSAddAtomic64(datalen,
4326 &cfil_stats.cfs_inject_q_out_enqueued);
4327 } else {
4328 cfil_queue_enqueue(
4329 &cfil_info->cfi_rcv.cfi_inject_q,
4330 data, datalen);
4331 OSAddAtomic64(datalen,
4332 &cfil_stats.cfs_inject_q_in_enqueued);
4333 }
4334 }
4335 }
4336
4337 CFIL_INFO_VERIFY(cfil_info);
4338
4339 return error;
4340 }
4341
4342 int
4343 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4344 uint64_t pass_offset, uint64_t peek_offset)
4345 {
4346 errno_t error = 0;
4347 struct cfil_entry *entry = NULL;
4348 struct cfe_buf *entrybuf;
4349 int updated = 0;
4350
4351 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4352
4353 socket_lock_assert_owned(so);
4354
4355 if (cfil_info == NULL) {
4356 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4357 (uint64_t)VM_KERNEL_ADDRPERM(so));
4358 error = 0;
4359 goto done;
4360 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4361 CFIL_LOG(LOG_ERR, "so %llx drop set",
4362 (uint64_t)VM_KERNEL_ADDRPERM(so));
4363 error = EPIPE;
4364 goto done;
4365 }
4366
4367 entry = &cfil_info->cfi_entries[kcunit - 1];
4368 if (outgoing) {
4369 entrybuf = &entry->cfe_snd;
4370 } else {
4371 entrybuf = &entry->cfe_rcv;
4372 }
4373
4374 /* Record updated offsets for this content filter */
4375 if (pass_offset > entrybuf->cfe_pass_offset) {
4376 entrybuf->cfe_pass_offset = pass_offset;
4377
4378 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4379 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4380 }
4381 updated = 1;
4382 } else {
4383 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4384 pass_offset, entrybuf->cfe_pass_offset);
4385 }
4386 /* Filter does not want or need to see data that's allowed to pass */
4387 if (peek_offset > entrybuf->cfe_pass_offset &&
4388 peek_offset > entrybuf->cfe_peek_offset) {
4389 entrybuf->cfe_peek_offset = peek_offset;
4390 updated = 1;
4391 }
4392 /* Nothing to do */
4393 if (updated == 0) {
4394 goto done;
4395 }
4396
4397 /* Move data held in control queue to pending queue if needed */
4398 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4399 if (error != 0) {
4400 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4401 error);
4402 goto done;
4403 }
4404 error = EJUSTRETURN;
4405
4406 done:
4407 /*
4408 * The filter is effectively detached when pass all from both sides
4409 * or when the socket is closed and no more data is waiting
4410 * to be delivered to the filter
4411 */
4412 if (entry != NULL &&
4413 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4414 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4415 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4416 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4417 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4418 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4419 #if LIFECYCLE_DEBUG
4420 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4421 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4422 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4423 #endif
4424 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4425 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4426 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4427 cfil_filters_attached(so) == 0) {
4428 #if LIFECYCLE_DEBUG
4429 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4430 #endif
4431 CFIL_LOG(LOG_INFO, "so %llx waking",
4432 (uint64_t)VM_KERNEL_ADDRPERM(so));
4433 wakeup((caddr_t)cfil_info);
4434 }
4435 }
4436 CFIL_INFO_VERIFY(cfil_info);
4437 CFIL_LOG(LOG_INFO, "return %d", error);
4438 return error;
4439 }
4440
4441 /*
4442 * Update pass offset for socket when no data is pending
4443 */
4444 static int
4445 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4446 {
4447 struct cfi_buf *cfi_buf;
4448 struct cfil_entry *entry;
4449 struct cfe_buf *entrybuf;
4450 uint32_t kcunit;
4451 uint64_t pass_offset = 0;
4452 boolean_t first = true;
4453
4454 if (cfil_info == NULL) {
4455 return 0;
4456 }
4457
4458 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4459 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4460
4461 socket_lock_assert_owned(so);
4462
4463 if (outgoing) {
4464 cfi_buf = &cfil_info->cfi_snd;
4465 } else {
4466 cfi_buf = &cfil_info->cfi_rcv;
4467 }
4468
4469 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4470 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4471 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4472
4473 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4474 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4475 entry = &cfil_info->cfi_entries[kcunit - 1];
4476
4477 /* Are we attached to a filter? */
4478 if (entry->cfe_filter == NULL) {
4479 continue;
4480 }
4481
4482 if (outgoing) {
4483 entrybuf = &entry->cfe_snd;
4484 } else {
4485 entrybuf = &entry->cfe_rcv;
4486 }
4487
4488 // Keep track of the smallest pass_offset among filters.
4489 if (first == true ||
4490 entrybuf->cfe_pass_offset < pass_offset) {
4491 pass_offset = entrybuf->cfe_pass_offset;
4492 first = false;
4493 }
4494 }
4495 cfi_buf->cfi_pass_offset = pass_offset;
4496 }
4497
4498 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4499 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4500
4501 return 0;
4502 }
4503
4504 int
4505 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4506 uint64_t pass_offset, uint64_t peek_offset)
4507 {
4508 errno_t error = 0;
4509
4510 CFIL_LOG(LOG_INFO, "");
4511
4512 socket_lock_assert_owned(so);
4513
4514 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4515 if (error != 0) {
4516 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4517 (uint64_t)VM_KERNEL_ADDRPERM(so),
4518 outgoing ? "out" : "in");
4519 goto release;
4520 }
4521
4522 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4523 pass_offset, peek_offset);
4524
4525 cfil_service_inject_queue(so, cfil_info, outgoing);
4526
4527 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4528 release:
4529 CFIL_INFO_VERIFY(cfil_info);
4530 cfil_release_sockbuf(so, outgoing);
4531
4532 return error;
4533 }
4534
4535
4536 static void
4537 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4538 {
4539 struct cfil_entry *entry;
4540 int kcunit;
4541 uint64_t drained;
4542
4543 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4544 goto done;
4545 }
4546
4547 socket_lock_assert_owned(so);
4548
4549 /*
4550 * Flush the output queues and ignore errors as long as
4551 * we are attached
4552 */
4553 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4554 if (cfil_info != NULL) {
4555 drained = 0;
4556 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4557 entry = &cfil_info->cfi_entries[kcunit - 1];
4558
4559 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4560 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4561 }
4562 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4563
4564 if (drained) {
4565 if (cfil_info->cfi_flags & CFIF_DROP) {
4566 OSIncrementAtomic(
4567 &cfil_stats.cfs_flush_out_drop);
4568 } else {
4569 OSIncrementAtomic(
4570 &cfil_stats.cfs_flush_out_close);
4571 }
4572 }
4573 }
4574 cfil_release_sockbuf(so, 1);
4575
4576 /*
4577 * Flush the input queues
4578 */
4579 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4580 if (cfil_info != NULL) {
4581 drained = 0;
4582 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4583 entry = &cfil_info->cfi_entries[kcunit - 1];
4584
4585 drained += cfil_queue_drain(
4586 &entry->cfe_rcv.cfe_ctl_q);
4587 drained += cfil_queue_drain(
4588 &entry->cfe_rcv.cfe_pending_q);
4589 }
4590 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4591
4592 if (drained) {
4593 if (cfil_info->cfi_flags & CFIF_DROP) {
4594 OSIncrementAtomic(
4595 &cfil_stats.cfs_flush_in_drop);
4596 } else {
4597 OSIncrementAtomic(
4598 &cfil_stats.cfs_flush_in_close);
4599 }
4600 }
4601 }
4602 cfil_release_sockbuf(so, 0);
4603 done:
4604 CFIL_INFO_VERIFY(cfil_info);
4605 }
4606
4607 int
4608 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4609 {
4610 errno_t error = 0;
4611 struct cfil_entry *entry;
4612 struct proc *p;
4613
4614 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4615 goto done;
4616 }
4617
4618 socket_lock_assert_owned(so);
4619
4620 entry = &cfil_info->cfi_entries[kcunit - 1];
4621
4622 /* Are we attached to the filter? */
4623 if (entry->cfe_filter == NULL) {
4624 goto done;
4625 }
4626
4627 cfil_info->cfi_flags |= CFIF_DROP;
4628
4629 p = current_proc();
4630
4631 /*
4632 * Force the socket to be marked defunct
4633 * (forcing fixed along with rdar://19391339)
4634 */
4635 if (so->so_cfil_db == NULL) {
4636 error = sosetdefunct(p, so,
4637 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4638 FALSE);
4639
4640 /* Flush the socket buffer and disconnect */
4641 if (error == 0) {
4642 error = sodefunct(p, so,
4643 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4644 }
4645 }
4646
4647 /* The filter is done, mark as detached */
4648 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4649 #if LIFECYCLE_DEBUG
4650 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4651 #endif
4652 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4653 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4654
4655 /* Pending data needs to go */
4656 cfil_flush_queues(so, cfil_info);
4657
4658 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4659 if (cfil_filters_attached(so) == 0) {
4660 CFIL_LOG(LOG_INFO, "so %llx waking",
4661 (uint64_t)VM_KERNEL_ADDRPERM(so));
4662 wakeup((caddr_t)cfil_info);
4663 }
4664 }
4665 done:
4666 return error;
4667 }
4668
4669 int
4670 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4671 {
4672 errno_t error = 0;
4673 struct cfil_info *cfil_info = NULL;
4674
4675 bool cfil_attached = false;
4676 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4677
4678 // Search and lock socket
4679 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4680 if (so == NULL) {
4681 error = ENOENT;
4682 } else {
4683 // The client gets a pass automatically
4684 cfil_info = (so->so_cfil_db != NULL) ?
4685 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4686
4687 if (cfil_attached) {
4688 #if VERDICT_DEBUG
4689 if (cfil_info != NULL) {
4690 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4691 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4692 (uint64_t)VM_KERNEL_ADDRPERM(so),
4693 cfil_info->cfi_sock_id);
4694 }
4695 #endif
4696 cfil_sock_received_verdict(so);
4697 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4698 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4699 } else {
4700 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4701 }
4702 socket_unlock(so, 1);
4703 }
4704
4705 return error;
4706 }
4707
4708 int
4709 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4710 {
4711 struct content_filter *cfc = NULL;
4712 cfil_crypto_state_t crypto_state = NULL;
4713 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4714
4715 CFIL_LOG(LOG_NOTICE, "");
4716
4717 if (content_filters == NULL) {
4718 CFIL_LOG(LOG_ERR, "no content filter");
4719 return EINVAL;
4720 }
4721 if (kcunit > MAX_CONTENT_FILTER) {
4722 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4723 kcunit, MAX_CONTENT_FILTER);
4724 return EINVAL;
4725 }
4726 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4727 if (crypto_state == NULL) {
4728 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4729 kcunit);
4730 return EINVAL;
4731 }
4732
4733 cfil_rw_lock_exclusive(&cfil_lck_rw);
4734
4735 cfc = content_filters[kcunit - 1];
4736 if (cfc->cf_kcunit != kcunit) {
4737 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4738 kcunit);
4739 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4740 cfil_crypto_cleanup_state(crypto_state);
4741 return EINVAL;
4742 }
4743 if (cfc->cf_crypto_state != NULL) {
4744 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4745 cfc->cf_crypto_state = NULL;
4746 }
4747 cfc->cf_crypto_state = crypto_state;
4748
4749 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4750 return 0;
4751 }
4752
4753 static int
4754 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4755 {
4756 struct cfil_entry *entry;
4757 struct cfe_buf *entrybuf;
4758 uint32_t kcunit;
4759
4760 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4761 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4762
4763 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4764 entry = &cfil_info->cfi_entries[kcunit - 1];
4765
4766 /* Are we attached to the filter? */
4767 if (entry->cfe_filter == NULL) {
4768 continue;
4769 }
4770
4771 if (outgoing) {
4772 entrybuf = &entry->cfe_snd;
4773 } else {
4774 entrybuf = &entry->cfe_rcv;
4775 }
4776
4777 entrybuf->cfe_ctl_q.q_start += datalen;
4778 if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
4779 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4780 }
4781 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4782 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4783 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4784 }
4785
4786 entrybuf->cfe_ctl_q.q_end += datalen;
4787
4788 entrybuf->cfe_pending_q.q_start += datalen;
4789 entrybuf->cfe_pending_q.q_end += datalen;
4790 }
4791 CFIL_INFO_VERIFY(cfil_info);
4792 return 0;
4793 }
4794
4795 int
4796 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4797 struct mbuf *data, struct mbuf *control, uint32_t flags)
4798 {
4799 #pragma unused(to, control, flags)
4800 errno_t error = 0;
4801 unsigned int datalen;
4802 int mbcnt = 0;
4803 int mbnum = 0;
4804 int kcunit;
4805 struct cfi_buf *cfi_buf;
4806 struct mbuf *chain = NULL;
4807
4808 if (cfil_info == NULL) {
4809 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4810 (uint64_t)VM_KERNEL_ADDRPERM(so));
4811 error = 0;
4812 goto done;
4813 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4814 CFIL_LOG(LOG_ERR, "so %llx drop set",
4815 (uint64_t)VM_KERNEL_ADDRPERM(so));
4816 error = EPIPE;
4817 goto done;
4818 }
4819
4820 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4821
4822 if (datalen == 0) {
4823 error = 0;
4824 goto done;
4825 }
4826
4827 if (outgoing) {
4828 cfi_buf = &cfil_info->cfi_snd;
4829 cfil_info->cfi_byte_outbound_count += datalen;
4830 } else {
4831 cfi_buf = &cfil_info->cfi_rcv;
4832 cfil_info->cfi_byte_inbound_count += datalen;
4833 }
4834
4835 cfi_buf->cfi_pending_last += datalen;
4836 cfi_buf->cfi_pending_mbcnt += mbcnt;
4837 cfi_buf->cfi_pending_mbnum += mbnum;
4838
4839 if (IS_IP_DGRAM(so)) {
4840 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4841 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4842 cfi_buf->cfi_tail_drop_cnt++;
4843 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4844 cfi_buf->cfi_pending_mbnum -= mbnum;
4845 return EPIPE;
4846 }
4847 }
4848
4849 cfil_info_buf_verify(cfi_buf);
4850
4851 #if DATA_DEBUG
4852 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4853 (uint64_t)VM_KERNEL_ADDRPERM(so),
4854 outgoing ? "OUT" : "IN",
4855 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4856 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4857 cfi_buf->cfi_pending_last,
4858 cfi_buf->cfi_pending_mbcnt,
4859 cfi_buf->cfi_pass_offset);
4860 #endif
4861
4862 /* Fast path when below pass offset */
4863 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4864 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4865 #if DATA_DEBUG
4866 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4867 #endif
4868 } else {
4869 struct cfil_entry *iter_entry;
4870 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4871 // Is cfil attached to this filter?
4872 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4873 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4874 if (IS_IP_DGRAM(so) && chain == NULL) {
4875 /* Datagrams only:
4876 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4877 * This full chain will be reinjected into socket after recieving verdict.
4878 */
4879 (void) cfil_dgram_save_socket_state(cfil_info, data);
4880 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4881 if (chain == NULL) {
4882 return ENOBUFS;
4883 }
4884 data = chain;
4885 }
4886 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4887 datalen);
4888 }
4889 /* 0 means passed so continue with next filter */
4890 if (error != 0) {
4891 break;
4892 }
4893 }
4894 }
4895
4896 /* Move cursor if no filter claimed the data */
4897 if (error == 0) {
4898 cfi_buf->cfi_pending_first += datalen;
4899 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4900 cfi_buf->cfi_pending_mbnum -= mbnum;
4901 cfil_info_buf_verify(cfi_buf);
4902 }
4903 done:
4904 CFIL_INFO_VERIFY(cfil_info);
4905
4906 return error;
4907 }
4908
4909 /*
4910 * Callback from socket layer sosendxxx()
4911 */
4912 int
4913 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4914 struct mbuf *data, struct mbuf *control, uint32_t flags)
4915 {
4916 int error = 0;
4917 int new_filter_control_unit = 0;
4918
4919 if (IS_IP_DGRAM(so)) {
4920 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4921 }
4922
4923 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4924 /* Drop pre-existing TCP sockets if filter is enabled now */
4925 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4926 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4927 if (new_filter_control_unit > 0) {
4928 return EPIPE;
4929 }
4930 }
4931 return 0;
4932 }
4933
4934 /* Drop pre-existing TCP sockets when filter state changed */
4935 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4936 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4937 return EPIPE;
4938 }
4939
4940 /*
4941 * Pass initial data for TFO.
4942 */
4943 if (IS_INITIAL_TFO_DATA(so)) {
4944 return 0;
4945 }
4946
4947 socket_lock_assert_owned(so);
4948
4949 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4950 CFIL_LOG(LOG_ERR, "so %llx drop set",
4951 (uint64_t)VM_KERNEL_ADDRPERM(so));
4952 return EPIPE;
4953 }
4954 if (control != NULL) {
4955 CFIL_LOG(LOG_ERR, "so %llx control",
4956 (uint64_t)VM_KERNEL_ADDRPERM(so));
4957 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4958 }
4959 if ((flags & MSG_OOB)) {
4960 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4961 (uint64_t)VM_KERNEL_ADDRPERM(so));
4962 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4963 }
4964 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4965 panic("so %p SB_LOCK not set", so);
4966 }
4967
4968 if (so->so_snd.sb_cfil_thread != NULL) {
4969 panic("%s sb_cfil_thread %p not NULL", __func__,
4970 so->so_snd.sb_cfil_thread);
4971 }
4972
4973 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4974
4975 return error;
4976 }
4977
4978 /*
4979 * Callback from socket layer sbappendxxx()
4980 */
4981 int
4982 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4983 struct mbuf *data, struct mbuf *control, uint32_t flags)
4984 {
4985 int error = 0;
4986 int new_filter_control_unit = 0;
4987
4988 if (IS_IP_DGRAM(so)) {
4989 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4990 }
4991
4992 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4993 /* Drop pre-existing TCP sockets if filter is enabled now */
4994 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4995 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4996 if (new_filter_control_unit > 0) {
4997 return EPIPE;
4998 }
4999 }
5000 return 0;
5001 }
5002
5003 /* Drop pre-existing TCP sockets when filter state changed */
5004 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5005 if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5006 return EPIPE;
5007 }
5008
5009 /*
5010 * Pass initial data for TFO.
5011 */
5012 if (IS_INITIAL_TFO_DATA(so)) {
5013 return 0;
5014 }
5015
5016 socket_lock_assert_owned(so);
5017
5018 if (so->so_cfil->cfi_flags & CFIF_DROP) {
5019 CFIL_LOG(LOG_ERR, "so %llx drop set",
5020 (uint64_t)VM_KERNEL_ADDRPERM(so));
5021 return EPIPE;
5022 }
5023 if (control != NULL) {
5024 CFIL_LOG(LOG_ERR, "so %llx control",
5025 (uint64_t)VM_KERNEL_ADDRPERM(so));
5026 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5027 }
5028 if (data->m_type == MT_OOBDATA) {
5029 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5030 (uint64_t)VM_KERNEL_ADDRPERM(so));
5031 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5032 }
5033 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
5034
5035 return error;
5036 }
5037
5038 /*
5039 * Callback from socket layer soshutdownxxx()
5040 *
5041 * We may delay the shutdown write if there's outgoing data in process.
5042 *
5043 * There is no point in delaying the shutdown read because the process
5044 * indicated that it does not want to read anymore data.
5045 */
5046 int
5047 cfil_sock_shutdown(struct socket *so, int *how)
5048 {
5049 int error = 0;
5050
5051 if (IS_IP_DGRAM(so)) {
5052 return cfil_sock_udp_shutdown(so, how);
5053 }
5054
5055 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5056 goto done;
5057 }
5058
5059 socket_lock_assert_owned(so);
5060
5061 CFIL_LOG(LOG_INFO, "so %llx how %d",
5062 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5063
5064 /*
5065 * Check the state of the socket before the content filter
5066 */
5067 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5068 /* read already shut down */
5069 error = ENOTCONN;
5070 goto done;
5071 }
5072 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5073 /* write already shut down */
5074 error = ENOTCONN;
5075 goto done;
5076 }
5077
5078 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5079 CFIL_LOG(LOG_ERR, "so %llx drop set",
5080 (uint64_t)VM_KERNEL_ADDRPERM(so));
5081 goto done;
5082 }
5083
5084 /*
5085 * shutdown read: SHUT_RD or SHUT_RDWR
5086 */
5087 if (*how != SHUT_WR) {
5088 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5089 error = ENOTCONN;
5090 goto done;
5091 }
5092 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5093 cfil_sock_notify_shutdown(so, SHUT_RD);
5094 }
5095 /*
5096 * shutdown write: SHUT_WR or SHUT_RDWR
5097 */
5098 if (*how != SHUT_RD) {
5099 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5100 error = ENOTCONN;
5101 goto done;
5102 }
5103 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5104 cfil_sock_notify_shutdown(so, SHUT_WR);
5105 /*
5106 * When outgoing data is pending, we delay the shutdown at the
5107 * protocol level until the content filters give the final
5108 * verdict on the pending data.
5109 */
5110 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5111 /*
5112 * When shutting down the read and write sides at once
5113 * we can proceed to the final shutdown of the read
5114 * side. Otherwise, we just return.
5115 */
5116 if (*how == SHUT_WR) {
5117 error = EJUSTRETURN;
5118 } else if (*how == SHUT_RDWR) {
5119 *how = SHUT_RD;
5120 }
5121 }
5122 }
5123 done:
5124 return error;
5125 }
5126
5127 /*
5128 * This is called when the socket is closed and there is no more
5129 * opportunity for filtering
5130 */
5131 void
5132 cfil_sock_is_closed(struct socket *so)
5133 {
5134 errno_t error = 0;
5135 int kcunit;
5136
5137 if (IS_IP_DGRAM(so)) {
5138 cfil_sock_udp_is_closed(so);
5139 return;
5140 }
5141
5142 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5143 return;
5144 }
5145
5146 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5147
5148 socket_lock_assert_owned(so);
5149
5150 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5151 /* Let the filters know of the closing */
5152 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5153 }
5154
5155 /* Last chance to push passed data out */
5156 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5157 if (error == 0) {
5158 cfil_service_inject_queue(so, so->so_cfil, 1);
5159 }
5160 cfil_release_sockbuf(so, 1);
5161
5162 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5163
5164 /* Pending data needs to go */
5165 cfil_flush_queues(so, so->so_cfil);
5166
5167 CFIL_INFO_VERIFY(so->so_cfil);
5168 }
5169
5170 /*
5171 * This is called when the socket is disconnected so let the filters
5172 * know about the disconnection and that no more data will come
5173 *
5174 * The how parameter has the same values as soshutown()
5175 */
5176 void
5177 cfil_sock_notify_shutdown(struct socket *so, int how)
5178 {
5179 errno_t error = 0;
5180 int kcunit;
5181
5182 if (IS_IP_DGRAM(so)) {
5183 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5184 return;
5185 }
5186
5187 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5188 return;
5189 }
5190
5191 CFIL_LOG(LOG_INFO, "so %llx how %d",
5192 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5193
5194 socket_lock_assert_owned(so);
5195
5196 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5197 /* Disconnect incoming side */
5198 if (how != SHUT_WR) {
5199 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5200 }
5201 /* Disconnect outgoing side */
5202 if (how != SHUT_RD) {
5203 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5204 }
5205 }
5206 }
5207
5208 static int
5209 cfil_filters_attached(struct socket *so)
5210 {
5211 struct cfil_entry *entry;
5212 uint32_t kcunit;
5213 int attached = 0;
5214
5215 if (IS_IP_DGRAM(so)) {
5216 return cfil_filters_udp_attached(so, FALSE);
5217 }
5218
5219 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5220 return 0;
5221 }
5222
5223 socket_lock_assert_owned(so);
5224
5225 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5226 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5227
5228 /* Are we attached to the filter? */
5229 if (entry->cfe_filter == NULL) {
5230 continue;
5231 }
5232 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5233 continue;
5234 }
5235 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5236 continue;
5237 }
5238 attached = 1;
5239 break;
5240 }
5241
5242 return attached;
5243 }
5244
5245 /*
5246 * This is called when the socket is closed and we are waiting for
5247 * the filters to gives the final pass or drop
5248 */
5249 void
5250 cfil_sock_close_wait(struct socket *so)
5251 {
5252 lck_mtx_t *mutex_held;
5253 struct timespec ts;
5254 int error;
5255
5256 if (IS_IP_DGRAM(so)) {
5257 cfil_sock_udp_close_wait(so);
5258 return;
5259 }
5260
5261 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5262 return;
5263 }
5264
5265 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5266
5267 if (so->so_proto->pr_getlock != NULL) {
5268 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5269 } else {
5270 mutex_held = so->so_proto->pr_domain->dom_mtx;
5271 }
5272 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5273
5274 while (cfil_filters_attached(so)) {
5275 /*
5276 * Notify the filters we are going away so they can detach
5277 */
5278 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5279
5280 /*
5281 * Make sure we need to wait after the filter are notified
5282 * of the disconnection
5283 */
5284 if (cfil_filters_attached(so) == 0) {
5285 break;
5286 }
5287
5288 CFIL_LOG(LOG_INFO, "so %llx waiting",
5289 (uint64_t)VM_KERNEL_ADDRPERM(so));
5290
5291 ts.tv_sec = cfil_close_wait_timeout / 1000;
5292 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5293 NSEC_PER_USEC * 1000;
5294
5295 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5296 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5297 error = msleep((caddr_t)so->so_cfil, mutex_held,
5298 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5299 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5300
5301 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5302 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5303
5304 /*
5305 * Force close in case of timeout
5306 */
5307 if (error != 0) {
5308 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5309 break;
5310 }
5311 }
5312 }
5313
5314 /*
5315 * Returns the size of the data held by the content filter by using
5316 */
5317 int32_t
5318 cfil_sock_data_pending(struct sockbuf *sb)
5319 {
5320 struct socket *so = sb->sb_so;
5321 uint64_t pending = 0;
5322
5323 if (IS_IP_DGRAM(so)) {
5324 return cfil_sock_udp_data_pending(sb, FALSE);
5325 }
5326
5327 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5328 struct cfi_buf *cfi_buf;
5329
5330 socket_lock_assert_owned(so);
5331
5332 if ((sb->sb_flags & SB_RECV) == 0) {
5333 cfi_buf = &so->so_cfil->cfi_snd;
5334 } else {
5335 cfi_buf = &so->so_cfil->cfi_rcv;
5336 }
5337
5338 pending = cfi_buf->cfi_pending_last -
5339 cfi_buf->cfi_pending_first;
5340
5341 /*
5342 * If we are limited by the "chars of mbufs used" roughly
5343 * adjust so we won't overcommit
5344 */
5345 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5346 pending = cfi_buf->cfi_pending_mbcnt;
5347 }
5348 }
5349
5350 VERIFY(pending < INT32_MAX);
5351
5352 return (int32_t)(pending);
5353 }
5354
5355 /*
5356 * Return the socket buffer space used by data being held by content filters
5357 * so processes won't clog the socket buffer
5358 */
5359 int32_t
5360 cfil_sock_data_space(struct sockbuf *sb)
5361 {
5362 struct socket *so = sb->sb_so;
5363 uint64_t pending = 0;
5364
5365 if (IS_IP_DGRAM(so)) {
5366 return cfil_sock_udp_data_pending(sb, TRUE);
5367 }
5368
5369 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5370 so->so_snd.sb_cfil_thread != current_thread()) {
5371 struct cfi_buf *cfi_buf;
5372
5373 socket_lock_assert_owned(so);
5374
5375 if ((sb->sb_flags & SB_RECV) == 0) {
5376 cfi_buf = &so->so_cfil->cfi_snd;
5377 } else {
5378 cfi_buf = &so->so_cfil->cfi_rcv;
5379 }
5380
5381 pending = cfi_buf->cfi_pending_last -
5382 cfi_buf->cfi_pending_first;
5383
5384 /*
5385 * If we are limited by the "chars of mbufs used" roughly
5386 * adjust so we won't overcommit
5387 */
5388 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5389 pending = cfi_buf->cfi_pending_mbcnt;
5390 }
5391 }
5392
5393 VERIFY(pending < INT32_MAX);
5394
5395 return (int32_t)(pending);
5396 }
5397
5398 /*
5399 * A callback from the socket and protocol layer when data becomes
5400 * available in the socket buffer to give a chance for the content filter
5401 * to re-inject data that was held back
5402 */
5403 void
5404 cfil_sock_buf_update(struct sockbuf *sb)
5405 {
5406 int outgoing;
5407 int error;
5408 struct socket *so = sb->sb_so;
5409
5410 if (IS_IP_DGRAM(so)) {
5411 cfil_sock_udp_buf_update(sb);
5412 return;
5413 }
5414
5415 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5416 return;
5417 }
5418
5419 if (!cfil_sbtrim) {
5420 return;
5421 }
5422
5423 socket_lock_assert_owned(so);
5424
5425 if ((sb->sb_flags & SB_RECV) == 0) {
5426 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5427 return;
5428 }
5429 outgoing = 1;
5430 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5431 } else {
5432 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5433 return;
5434 }
5435 outgoing = 0;
5436 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5437 }
5438
5439 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5440 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5441
5442 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5443 if (error == 0) {
5444 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5445 }
5446 cfil_release_sockbuf(so, outgoing);
5447 }
5448
5449 int
5450 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5451 struct sysctl_req *req)
5452 {
5453 #pragma unused(oidp, arg1, arg2)
5454 int error = 0;
5455 size_t len = 0;
5456 u_int32_t i;
5457
5458 /* Read only */
5459 if (req->newptr != USER_ADDR_NULL) {
5460 return EPERM;
5461 }
5462
5463 cfil_rw_lock_shared(&cfil_lck_rw);
5464
5465 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5466 struct cfil_filter_stat filter_stat;
5467 struct content_filter *cfc = content_filters[i];
5468
5469 if (cfc == NULL) {
5470 continue;
5471 }
5472
5473 /* If just asking for the size */
5474 if (req->oldptr == USER_ADDR_NULL) {
5475 len += sizeof(struct cfil_filter_stat);
5476 continue;
5477 }
5478
5479 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5480 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5481 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5482 filter_stat.cfs_flags = cfc->cf_flags;
5483 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5484 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5485
5486 error = SYSCTL_OUT(req, &filter_stat,
5487 sizeof(struct cfil_filter_stat));
5488 if (error != 0) {
5489 break;
5490 }
5491 }
5492 /* If just asking for the size */
5493 if (req->oldptr == USER_ADDR_NULL) {
5494 req->oldidx = len;
5495 }
5496
5497 cfil_rw_unlock_shared(&cfil_lck_rw);
5498
5499 #if SHOW_DEBUG
5500 if (req->oldptr != USER_ADDR_NULL) {
5501 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5502 cfil_filter_show(i);
5503 }
5504 }
5505 #endif
5506
5507 return error;
5508 }
5509
5510 static int
5511 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5512 struct sysctl_req *req)
5513 {
5514 #pragma unused(oidp, arg1, arg2)
5515 int error = 0;
5516 u_int32_t i;
5517 struct cfil_info *cfi;
5518
5519 /* Read only */
5520 if (req->newptr != USER_ADDR_NULL) {
5521 return EPERM;
5522 }
5523
5524 cfil_rw_lock_shared(&cfil_lck_rw);
5525
5526 /*
5527 * If just asking for the size,
5528 */
5529 if (req->oldptr == USER_ADDR_NULL) {
5530 req->oldidx = cfil_sock_attached_count *
5531 sizeof(struct cfil_sock_stat);
5532 /* Bump the length in case new sockets gets attached */
5533 req->oldidx += req->oldidx >> 3;
5534 goto done;
5535 }
5536
5537 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5538 struct cfil_entry *entry;
5539 struct cfil_sock_stat stat;
5540 struct socket *so = cfi->cfi_so;
5541
5542 bzero(&stat, sizeof(struct cfil_sock_stat));
5543 stat.cfs_len = sizeof(struct cfil_sock_stat);
5544 stat.cfs_sock_id = cfi->cfi_sock_id;
5545 stat.cfs_flags = cfi->cfi_flags;
5546
5547 if (so != NULL) {
5548 stat.cfs_pid = so->last_pid;
5549 memcpy(stat.cfs_uuid, so->last_uuid,
5550 sizeof(uuid_t));
5551 if (so->so_flags & SOF_DELEGATED) {
5552 stat.cfs_e_pid = so->e_pid;
5553 memcpy(stat.cfs_e_uuid, so->e_uuid,
5554 sizeof(uuid_t));
5555 } else {
5556 stat.cfs_e_pid = so->last_pid;
5557 memcpy(stat.cfs_e_uuid, so->last_uuid,
5558 sizeof(uuid_t));
5559 }
5560
5561 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5562 stat.cfs_sock_type = so->so_proto->pr_type;
5563 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5564 }
5565
5566 stat.cfs_snd.cbs_pending_first =
5567 cfi->cfi_snd.cfi_pending_first;
5568 stat.cfs_snd.cbs_pending_last =
5569 cfi->cfi_snd.cfi_pending_last;
5570 stat.cfs_snd.cbs_inject_q_len =
5571 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5572 stat.cfs_snd.cbs_pass_offset =
5573 cfi->cfi_snd.cfi_pass_offset;
5574
5575 stat.cfs_rcv.cbs_pending_first =
5576 cfi->cfi_rcv.cfi_pending_first;
5577 stat.cfs_rcv.cbs_pending_last =
5578 cfi->cfi_rcv.cfi_pending_last;
5579 stat.cfs_rcv.cbs_inject_q_len =
5580 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5581 stat.cfs_rcv.cbs_pass_offset =
5582 cfi->cfi_rcv.cfi_pass_offset;
5583
5584 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5585 struct cfil_entry_stat *estat;
5586 struct cfe_buf *ebuf;
5587 struct cfe_buf_stat *sbuf;
5588
5589 entry = &cfi->cfi_entries[i];
5590
5591 estat = &stat.ces_entries[i];
5592
5593 estat->ces_len = sizeof(struct cfil_entry_stat);
5594 estat->ces_filter_id = entry->cfe_filter ?
5595 entry->cfe_filter->cf_kcunit : 0;
5596 estat->ces_flags = entry->cfe_flags;
5597 estat->ces_necp_control_unit =
5598 entry->cfe_necp_control_unit;
5599
5600 estat->ces_last_event.tv_sec =
5601 (int64_t)entry->cfe_last_event.tv_sec;
5602 estat->ces_last_event.tv_usec =
5603 (int64_t)entry->cfe_last_event.tv_usec;
5604
5605 estat->ces_last_action.tv_sec =
5606 (int64_t)entry->cfe_last_action.tv_sec;
5607 estat->ces_last_action.tv_usec =
5608 (int64_t)entry->cfe_last_action.tv_usec;
5609
5610 ebuf = &entry->cfe_snd;
5611 sbuf = &estat->ces_snd;
5612 sbuf->cbs_pending_first =
5613 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5614 sbuf->cbs_pending_last =
5615 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5616 sbuf->cbs_ctl_first =
5617 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5618 sbuf->cbs_ctl_last =
5619 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5620 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5621 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5622 sbuf->cbs_peeked = ebuf->cfe_peeked;
5623
5624 ebuf = &entry->cfe_rcv;
5625 sbuf = &estat->ces_rcv;
5626 sbuf->cbs_pending_first =
5627 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5628 sbuf->cbs_pending_last =
5629 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5630 sbuf->cbs_ctl_first =
5631 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5632 sbuf->cbs_ctl_last =
5633 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5634 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5635 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5636 sbuf->cbs_peeked = ebuf->cfe_peeked;
5637 }
5638 error = SYSCTL_OUT(req, &stat,
5639 sizeof(struct cfil_sock_stat));
5640 if (error != 0) {
5641 break;
5642 }
5643 }
5644 done:
5645 cfil_rw_unlock_shared(&cfil_lck_rw);
5646
5647 #if SHOW_DEBUG
5648 if (req->oldptr != USER_ADDR_NULL) {
5649 cfil_info_show();
5650 }
5651 #endif
5652
5653 return error;
5654 }
5655
5656 /*
5657 * UDP Socket Support
5658 */
5659 static void
5660 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5661 {
5662 char local[MAX_IPv6_STR_LEN + 6];
5663 char remote[MAX_IPv6_STR_LEN + 6];
5664 const void *addr;
5665
5666 // No sock or not UDP, no-op
5667 if (so == NULL || entry == NULL) {
5668 return;
5669 }
5670
5671 local[0] = remote[0] = 0x0;
5672
5673 switch (entry->cfentry_family) {
5674 case AF_INET6:
5675 addr = &entry->cfentry_laddr.addr6;
5676 inet_ntop(AF_INET6, addr, local, sizeof(local));
5677 addr = &entry->cfentry_faddr.addr6;
5678 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5679 break;
5680 case AF_INET:
5681 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5682 inet_ntop(AF_INET, addr, local, sizeof(local));
5683 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5684 inet_ntop(AF_INET, addr, remote, sizeof(local));
5685 break;
5686 default:
5687 return;
5688 }
5689
5690 CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5691 msg,
5692 IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5693 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5694 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote,
5695 entry->cfentry_flowhash);
5696 }
5697
5698 static void
5699 cfil_inp_log(int level, struct socket *so, const char* msg)
5700 {
5701 struct inpcb *inp = NULL;
5702 char local[MAX_IPv6_STR_LEN + 6];
5703 char remote[MAX_IPv6_STR_LEN + 6];
5704 const void *addr;
5705
5706 if (so == NULL) {
5707 return;
5708 }
5709
5710 inp = sotoinpcb(so);
5711 if (inp == NULL) {
5712 return;
5713 }
5714
5715 local[0] = remote[0] = 0x0;
5716
5717 if (inp->inp_vflag & INP_IPV6) {
5718 addr = &inp->in6p_laddr.s6_addr32;
5719 inet_ntop(AF_INET6, addr, local, sizeof(local));
5720 addr = &inp->in6p_faddr.s6_addr32;
5721 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5722 } else {
5723 addr = &inp->inp_laddr.s_addr;
5724 inet_ntop(AF_INET, addr, local, sizeof(local));
5725 addr = &inp->inp_faddr.s_addr;
5726 inet_ntop(AF_INET, addr, remote, sizeof(local));
5727 }
5728
5729 if (so->so_cfil != NULL) {
5730 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5731 msg, IS_UDP(so) ? "UDP" : "TCP",
5732 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5733 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5734 } else {
5735 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5736 msg, IS_UDP(so) ? "UDP" : "TCP",
5737 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5738 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5739 }
5740 }
5741
5742 static void
5743 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5744 {
5745 if (cfil_info == NULL) {
5746 return;
5747 }
5748
5749 if (cfil_info->cfi_hash_entry != NULL) {
5750 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5751 } else {
5752 cfil_inp_log(level, cfil_info->cfi_so, msg);
5753 }
5754 }
5755
5756 errno_t
5757 cfil_db_init(struct socket *so)
5758 {
5759 errno_t error = 0;
5760 struct cfil_db *db = NULL;
5761
5762 CFIL_LOG(LOG_INFO, "");
5763
5764 db = zalloc(cfil_db_zone);
5765 if (db == NULL) {
5766 error = ENOMEM;
5767 goto done;
5768 }
5769 bzero(db, sizeof(struct cfil_db));
5770 db->cfdb_so = so;
5771 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5772 if (db->cfdb_hashbase == NULL) {
5773 zfree(cfil_db_zone, db);
5774 db = NULL;
5775 error = ENOMEM;
5776 goto done;
5777 }
5778
5779 so->so_cfil_db = db;
5780
5781 done:
5782 return error;
5783 }
5784
5785 void
5786 cfil_db_free(struct socket *so)
5787 {
5788 struct cfil_hash_entry *entry = NULL;
5789 struct cfil_hash_entry *temp_entry = NULL;
5790 struct cfilhashhead *cfilhash = NULL;
5791 struct cfil_db *db = NULL;
5792
5793 CFIL_LOG(LOG_INFO, "");
5794
5795 if (so == NULL || so->so_cfil_db == NULL) {
5796 return;
5797 }
5798 db = so->so_cfil_db;
5799
5800 #if LIFECYCLE_DEBUG
5801 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5802 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5803 #endif
5804
5805 for (int i = 0; i < CFILHASHSIZE; i++) {
5806 cfilhash = &db->cfdb_hashbase[i];
5807 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5808 if (entry->cfentry_cfil != NULL) {
5809 #if LIFECYCLE_DEBUG
5810 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5811 #endif
5812 CFIL_INFO_FREE(entry->cfentry_cfil);
5813 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5814 entry->cfentry_cfil = NULL;
5815 }
5816
5817 cfil_db_delete_entry(db, entry);
5818 if (so->so_flags & SOF_CONTENT_FILTER) {
5819 if (db->cfdb_count == 0) {
5820 so->so_flags &= ~SOF_CONTENT_FILTER;
5821 }
5822 VERIFY(so->so_usecount > 0);
5823 so->so_usecount--;
5824 }
5825 }
5826 }
5827
5828 // Make sure all entries are cleaned up!
5829 VERIFY(db->cfdb_count == 0);
5830 #if LIFECYCLE_DEBUG
5831 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5832 #endif
5833
5834 hashdestroy(db->cfdb_hashbase, M_CFIL, db->cfdb_hashmask);
5835 zfree(cfil_db_zone, db);
5836 so->so_cfil_db = NULL;
5837 }
5838
5839 static bool
5840 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr, bool islocalUpdate)
5841 {
5842 struct sockaddr_in *sin = NULL;
5843 struct sockaddr_in6 *sin6 = NULL;
5844
5845 if (entry == NULL || addr == NULL) {
5846 return FALSE;
5847 }
5848
5849 switch (addr->sa_family) {
5850 case AF_INET:
5851 sin = satosin(addr);
5852 if (sin->sin_len != sizeof(*sin)) {
5853 return FALSE;
5854 }
5855 if (isLocal == TRUE) {
5856 if (sin->sin_port) {
5857 entry->cfentry_lport = sin->sin_port;
5858 if (islocalUpdate) {
5859 entry->cfentry_lport_updated = TRUE;
5860 }
5861 }
5862 if (sin->sin_addr.s_addr) {
5863 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5864 if (islocalUpdate) {
5865 entry->cfentry_laddr_updated = TRUE;
5866 }
5867 }
5868 } else {
5869 if (sin->sin_port) {
5870 entry->cfentry_fport = sin->sin_port;
5871 }
5872 if (sin->sin_addr.s_addr) {
5873 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5874 }
5875 }
5876 entry->cfentry_family = AF_INET;
5877 return TRUE;
5878 case AF_INET6:
5879 sin6 = satosin6(addr);
5880 if (sin6->sin6_len != sizeof(*sin6)) {
5881 return FALSE;
5882 }
5883 if (isLocal == TRUE) {
5884 if (sin6->sin6_port) {
5885 entry->cfentry_lport = sin6->sin6_port;
5886 if (islocalUpdate) {
5887 entry->cfentry_lport_updated = TRUE;
5888 }
5889 }
5890 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5891 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5892 if (islocalUpdate) {
5893 entry->cfentry_laddr_updated = TRUE;
5894 }
5895 }
5896 } else {
5897 if (sin6->sin6_port) {
5898 entry->cfentry_fport = sin6->sin6_port;
5899 }
5900 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5901 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5902 }
5903 }
5904 entry->cfentry_family = AF_INET6;
5905 return TRUE;
5906 default:
5907 return FALSE;
5908 }
5909 }
5910
5911 static bool
5912 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp, bool islocalUpdate)
5913 {
5914 if (entry == NULL || inp == NULL) {
5915 return FALSE;
5916 }
5917
5918 if (inp->inp_vflag & INP_IPV6) {
5919 if (isLocal == TRUE) {
5920 if (inp->inp_lport) {
5921 entry->cfentry_lport = inp->inp_lport;
5922 if (islocalUpdate) {
5923 entry->cfentry_lport_updated = TRUE;
5924 }
5925 }
5926 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
5927 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5928 if (islocalUpdate) {
5929 entry->cfentry_laddr_updated = TRUE;
5930 }
5931 }
5932 } else {
5933 if (inp->inp_fport) {
5934 entry->cfentry_fport = inp->inp_fport;
5935 }
5936 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
5937 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5938 }
5939 }
5940 entry->cfentry_family = AF_INET6;
5941 return TRUE;
5942 } else if (inp->inp_vflag & INP_IPV4) {
5943 if (isLocal == TRUE) {
5944 if (inp->inp_lport) {
5945 entry->cfentry_lport = inp->inp_lport;
5946 if (islocalUpdate) {
5947 entry->cfentry_lport_updated = TRUE;
5948 }
5949 }
5950 if (inp->inp_laddr.s_addr) {
5951 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5952 if (islocalUpdate) {
5953 entry->cfentry_laddr_updated = TRUE;
5954 }
5955 }
5956 } else {
5957 if (inp->inp_fport) {
5958 entry->cfentry_fport = inp->inp_fport;
5959 }
5960 if (inp->inp_faddr.s_addr) {
5961 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5962 }
5963 }
5964 entry->cfentry_family = AF_INET;
5965 return TRUE;
5966 }
5967 return FALSE;
5968 }
5969
5970 bool
5971 check_port(struct sockaddr *addr, u_short port)
5972 {
5973 struct sockaddr_in *sin = NULL;
5974 struct sockaddr_in6 *sin6 = NULL;
5975
5976 if (addr == NULL || port == 0) {
5977 return FALSE;
5978 }
5979
5980 switch (addr->sa_family) {
5981 case AF_INET:
5982 sin = satosin(addr);
5983 if (sin->sin_len != sizeof(*sin)) {
5984 return FALSE;
5985 }
5986 if (port == ntohs(sin->sin_port)) {
5987 return TRUE;
5988 }
5989 break;
5990 case AF_INET6:
5991 sin6 = satosin6(addr);
5992 if (sin6->sin6_len != sizeof(*sin6)) {
5993 return FALSE;
5994 }
5995 if (port == ntohs(sin6->sin6_port)) {
5996 return TRUE;
5997 }
5998 break;
5999 default:
6000 break;
6001 }
6002 return FALSE;
6003 }
6004
6005 struct cfil_hash_entry *
6006 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
6007 {
6008 struct cfilhashhead *cfilhash = NULL;
6009 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
6010 struct cfil_hash_entry *nextentry;
6011
6012 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
6013 return NULL;
6014 }
6015
6016 flowhash &= db->cfdb_hashmask;
6017 cfilhash = &db->cfdb_hashbase[flowhash];
6018
6019 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6020 if (nextentry->cfentry_cfil != NULL &&
6021 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
6022 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
6023 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
6024 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
6025 return nextentry;
6026 }
6027 }
6028
6029 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
6030 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
6031 return NULL;
6032 }
6033
6034 struct cfil_hash_entry *
6035 cfil_db_lookup_entry_internal(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly, boolean_t withLocalPort)
6036 {
6037 struct cfil_hash_entry matchentry = { };
6038 struct cfil_hash_entry *nextentry = NULL;
6039 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6040 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6041 u_int16_t hashkey_fport = 0, hashkey_lport = 0;
6042 int inp_hash_element = 0;
6043 struct cfilhashhead *cfilhash = NULL;
6044
6045 CFIL_LOG(LOG_INFO, "");
6046
6047 if (inp == NULL) {
6048 goto done;
6049 }
6050
6051 if (local != NULL) {
6052 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local, FALSE);
6053 } else {
6054 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp, FALSE);
6055 }
6056 if (remote != NULL) {
6057 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote, FALSE);
6058 } else {
6059 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp, FALSE);
6060 }
6061
6062 if (inp->inp_vflag & INP_IPV6) {
6063 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
6064 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
6065 } else {
6066 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
6067 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
6068 }
6069
6070 hashkey_fport = matchentry.cfentry_fport;
6071 hashkey_lport = (remoteOnly == false || withLocalPort == true) ? matchentry.cfentry_lport : 0;
6072
6073 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
6074 inp_hash_element &= db->cfdb_hashmask;
6075 cfilhash = &db->cfdb_hashbase[inp_hash_element];
6076
6077 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6078 if ((inp->inp_vflag & INP_IPV6) &&
6079 (remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6080 nextentry->cfentry_fport == matchentry.cfentry_fport &&
6081 (remoteOnly || nextentry->cfentry_laddr_updated || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
6082 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
6083 #if DATA_DEBUG
6084 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6085 #endif
6086 return nextentry;
6087 } else if ((remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6088 nextentry->cfentry_fport == matchentry.cfentry_fport &&
6089 (remoteOnly || nextentry->cfentry_laddr_updated || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
6090 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
6091 #if DATA_DEBUG
6092 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6093 #endif
6094 return nextentry;
6095 }
6096 }
6097
6098 done:
6099 #if DATA_DEBUG
6100 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6101 #endif
6102 return NULL;
6103 }
6104
6105 struct cfil_hash_entry *
6106 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
6107 {
6108 struct cfil_hash_entry *entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, false);
6109 if (entry == NULL && remoteOnly == true) {
6110 entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, true);
6111 }
6112 return entry;
6113 }
6114
6115 cfil_sock_id_t
6116 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6117 {
6118 struct cfil_hash_entry *hash_entry = NULL;
6119
6120 socket_lock_assert_owned(so);
6121
6122 if (so->so_cfil_db == NULL) {
6123 return CFIL_SOCK_ID_NONE;
6124 }
6125
6126 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6127 if (hash_entry == NULL) {
6128 // No match with both local and remote, try match with remote only
6129 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6130 }
6131 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6132 return CFIL_SOCK_ID_NONE;
6133 }
6134
6135 return hash_entry->cfentry_cfil->cfi_sock_id;
6136 }
6137
6138 void
6139 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6140 {
6141 if (hash_entry == NULL) {
6142 return;
6143 }
6144 if (db == NULL || db->cfdb_count == 0) {
6145 return;
6146 }
6147 db->cfdb_count--;
6148 if (db->cfdb_only_entry == hash_entry) {
6149 db->cfdb_only_entry = NULL;
6150 }
6151 LIST_REMOVE(hash_entry, cfentry_link);
6152 zfree(cfil_hash_entry_zone, hash_entry);
6153 }
6154
6155 struct cfil_hash_entry *
6156 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6157 {
6158 struct cfil_hash_entry *entry = NULL;
6159 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6160 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6161 int inp_hash_element = 0;
6162 struct cfilhashhead *cfilhash = NULL;
6163
6164 CFIL_LOG(LOG_INFO, "");
6165
6166 if (inp == NULL) {
6167 goto done;
6168 }
6169
6170 entry = zalloc(cfil_hash_entry_zone);
6171 if (entry == NULL) {
6172 goto done;
6173 }
6174 bzero(entry, sizeof(struct cfil_hash_entry));
6175
6176 if (local != NULL) {
6177 fill_cfil_hash_entry_from_address(entry, TRUE, local, FALSE);
6178 } else {
6179 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, FALSE);
6180 }
6181 if (remote != NULL) {
6182 fill_cfil_hash_entry_from_address(entry, FALSE, remote, FALSE);
6183 } else {
6184 fill_cfil_hash_entry_from_inp(entry, FALSE, inp, FALSE);
6185 }
6186 entry->cfentry_lastused = net_uptime();
6187
6188 if (inp->inp_vflag & INP_IPV6) {
6189 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6190 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6191 } else {
6192 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6193 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6194 }
6195 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6196 entry->cfentry_lport, entry->cfentry_fport);
6197 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6198
6199 cfilhash = &db->cfdb_hashbase[inp_hash_element];
6200
6201 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6202 db->cfdb_count++;
6203 db->cfdb_only_entry = entry;
6204 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6205
6206 done:
6207 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6208 return entry;
6209 }
6210
6211 void
6212 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local, struct mbuf *control)
6213 {
6214 struct inpcb *inp = sotoinpcb(db->cfdb_so);
6215 union sockaddr_in_4_6 address_buf = { };
6216
6217 CFIL_LOG(LOG_INFO, "");
6218
6219 if (inp == NULL || entry == NULL) {
6220 return;
6221 }
6222
6223 if (LOCAL_ADDRESS_NEEDS_UPDATE(entry)) {
6224 // Flow does not have a local address yet. Retrieve local address
6225 // from control mbufs if present.
6226 if (local == NULL && control != NULL) {
6227 uint8_t *addr_ptr = NULL;
6228 int size = cfil_sock_udp_get_address_from_control(entry->cfentry_family, control, &addr_ptr);
6229
6230 if (size && addr_ptr) {
6231 switch (entry->cfentry_family) {
6232 case AF_INET:
6233 if (size == sizeof(struct in_addr)) {
6234 address_buf.sin.sin_port = 0;
6235 address_buf.sin.sin_family = AF_INET;
6236 address_buf.sin.sin_len = sizeof(struct sockaddr_in);
6237 (void) memcpy(&address_buf.sin.sin_addr, addr_ptr, sizeof(struct in_addr));
6238 local = sintosa(&address_buf.sin);
6239 }
6240 break;
6241 case AF_INET6:
6242 if (size == sizeof(struct in6_addr)) {
6243 address_buf.sin6.sin6_port = 0;
6244 address_buf.sin6.sin6_family = AF_INET6;
6245 address_buf.sin6.sin6_len = sizeof(struct sockaddr_in6);
6246 (void) memcpy(&address_buf.sin6.sin6_addr, addr_ptr, sizeof(struct in6_addr));
6247 local = sin6tosa(&address_buf.sin6);
6248 }
6249 break;
6250 default:
6251 break;
6252 }
6253 }
6254 }
6255 if (local != NULL) {
6256 fill_cfil_hash_entry_from_address(entry, TRUE, local, TRUE);
6257 } else {
6258 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6259 }
6260 }
6261
6262 if (LOCAL_PORT_NEEDS_UPDATE(entry, db->cfdb_so)) {
6263 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6264 }
6265
6266 return;
6267 }
6268
6269 struct cfil_info *
6270 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6271 {
6272 struct cfil_hash_entry *hash_entry = NULL;
6273
6274 CFIL_LOG(LOG_INFO, "");
6275
6276 if (db == NULL || id == 0) {
6277 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6278 db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6279 return NULL;
6280 }
6281
6282 // This is an optimization for connected UDP socket which only has one flow.
6283 // No need to do the hash lookup.
6284 if (db->cfdb_count == 1) {
6285 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6286 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6287 return db->cfdb_only_entry->cfentry_cfil;
6288 }
6289 }
6290
6291 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6292 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6293 }
6294
6295 struct cfil_hash_entry *
6296 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, struct mbuf *control, int debug)
6297 {
6298 struct cfil_hash_entry *hash_entry = NULL;
6299 int new_filter_control_unit = 0;
6300
6301 errno_t error = 0;
6302 socket_lock_assert_owned(so);
6303
6304 // If new socket, allocate cfil db
6305 if (so->so_cfil_db == NULL) {
6306 if (cfil_db_init(so) != 0) {
6307 return NULL;
6308 }
6309 }
6310
6311 // See if flow already exists.
6312 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6313 if (hash_entry == NULL) {
6314 // No match with both local and remote, try match with remote only
6315 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6316 }
6317 if (hash_entry != NULL) {
6318 /* Drop pre-existing UDP flow if filter state changed */
6319 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6320 if (new_filter_control_unit > 0 &&
6321 new_filter_control_unit != hash_entry->cfentry_cfil->cfi_filter_control_unit) {
6322 return NULL;
6323 }
6324
6325 // Try to update flow info from socket and/or control mbufs if necessary
6326 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry) || LOCAL_PORT_NEEDS_UPDATE(hash_entry, so)) {
6327 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6328 }
6329 return hash_entry;
6330 }
6331
6332 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6333 if (hash_entry == NULL) {
6334 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6335 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6336 return NULL;
6337 }
6338
6339 if (cfil_info_alloc(so, hash_entry) == NULL ||
6340 hash_entry->cfentry_cfil == NULL) {
6341 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6342 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6343 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6344 return NULL;
6345 }
6346 hash_entry->cfentry_cfil->cfi_filter_control_unit = filter_control_unit;
6347 hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6348 hash_entry->cfentry_cfil->cfi_debug = debug;
6349
6350 #if LIFECYCLE_DEBUG
6351 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6352 #endif
6353
6354 // Check if we can update the new flow's local address from control mbufs
6355 if (control != NULL) {
6356 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6357 }
6358
6359 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6360 CFIL_INFO_FREE(hash_entry->cfentry_cfil);
6361 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6362 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6363 filter_control_unit);
6364 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6365 return NULL;
6366 }
6367 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6368 (uint64_t)VM_KERNEL_ADDRPERM(so),
6369 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6370
6371 so->so_flags |= SOF_CONTENT_FILTER;
6372 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6373
6374 /* Hold a reference on the socket for each flow */
6375 so->so_usecount++;
6376
6377 if (debug) {
6378 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6379 }
6380
6381 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6382 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6383 /* We can recover from flow control or out of memory errors */
6384 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6385 return NULL;
6386 }
6387
6388 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6389 return hash_entry;
6390 }
6391
6392 int
6393 cfil_sock_udp_get_address_from_control(sa_family_t family, struct mbuf *control, uint8_t **address_ptr)
6394 {
6395 struct cmsghdr *cm;
6396 struct in6_pktinfo *pi6;
6397
6398 if (control == NULL || address_ptr == NULL) {
6399 return 0;
6400 }
6401
6402 while (control) {
6403 if (control->m_type != MT_CONTROL) {
6404 control = control->m_next;
6405 continue;
6406 }
6407
6408 for (cm = M_FIRST_CMSGHDR(control);
6409 is_cmsg_valid(control, cm);
6410 cm = M_NXT_CMSGHDR(control, cm)) {
6411 switch (cm->cmsg_type) {
6412 case IP_RECVDSTADDR:
6413 if (family == AF_INET &&
6414 cm->cmsg_level == IPPROTO_IP &&
6415 cm->cmsg_len == CMSG_LEN(sizeof(struct in_addr))) {
6416 *address_ptr = CMSG_DATA(cm);
6417 return sizeof(struct in_addr);
6418 }
6419 break;
6420 case IPV6_PKTINFO:
6421 case IPV6_2292PKTINFO:
6422 if (family == AF_INET6 &&
6423 cm->cmsg_level == IPPROTO_IPV6 &&
6424 cm->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) {
6425 pi6 = (struct in6_pktinfo *)(void *)CMSG_DATA(cm);
6426 *address_ptr = (uint8_t *)&pi6->ipi6_addr;
6427 return sizeof(struct in6_addr);
6428 }
6429 break;
6430 default:
6431 break;
6432 }
6433 }
6434
6435 control = control->m_next;
6436 }
6437 return 0;
6438 }
6439
6440 errno_t
6441 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6442 struct sockaddr *local, struct sockaddr *remote,
6443 struct mbuf *data, struct mbuf *control, uint32_t flags)
6444 {
6445 #pragma unused(outgoing, so, local, remote, data, control, flags)
6446 errno_t error = 0;
6447 uint32_t filter_control_unit;
6448 struct cfil_hash_entry *hash_entry = NULL;
6449 struct cfil_info *cfil_info = NULL;
6450 int debug = 0;
6451
6452 socket_lock_assert_owned(so);
6453
6454 if (cfil_active_count == 0) {
6455 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6456 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6457 return error;
6458 }
6459
6460 // Socket has been blessed
6461 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6462 return error;
6463 }
6464
6465 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6466 if (filter_control_unit == 0) {
6467 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6468 return error;
6469 }
6470
6471 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6472 return error;
6473 }
6474
6475 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6476 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6477 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6478 return error;
6479 }
6480
6481 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, control, debug);
6482 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6483 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6484 return EPIPE;
6485 }
6486 // Update last used timestamp, this is for flow Idle TO
6487 hash_entry->cfentry_lastused = net_uptime();
6488 cfil_info = hash_entry->cfentry_cfil;
6489
6490 if (cfil_info->cfi_flags & CFIF_DROP) {
6491 #if DATA_DEBUG
6492 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6493 #endif
6494 return EPIPE;
6495 }
6496 if (control != NULL) {
6497 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6498 }
6499 if (data->m_type == MT_OOBDATA) {
6500 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6501 (uint64_t)VM_KERNEL_ADDRPERM(so));
6502 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6503 }
6504
6505 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6506
6507 return error;
6508 }
6509
6510 /*
6511 * Go through all UDP flows for specified socket and returns TRUE if
6512 * any flow is still attached. If need_wait is TRUE, wait on first
6513 * attached flow.
6514 */
6515 static int
6516 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6517 {
6518 struct timespec ts;
6519 lck_mtx_t *mutex_held;
6520 struct cfilhashhead *cfilhash = NULL;
6521 struct cfil_db *db = NULL;
6522 struct cfil_hash_entry *hash_entry = NULL;
6523 struct cfil_hash_entry *temp_hash_entry = NULL;
6524 struct cfil_info *cfil_info = NULL;
6525 struct cfil_entry *entry = NULL;
6526 errno_t error = 0;
6527 int kcunit;
6528 int attached = 0;
6529 uint64_t sock_flow_id = 0;
6530
6531 socket_lock_assert_owned(so);
6532
6533 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6534 if (so->so_proto->pr_getlock != NULL) {
6535 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6536 } else {
6537 mutex_held = so->so_proto->pr_domain->dom_mtx;
6538 }
6539 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6540
6541 db = so->so_cfil_db;
6542
6543 for (int i = 0; i < CFILHASHSIZE; i++) {
6544 cfilhash = &db->cfdb_hashbase[i];
6545
6546 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6547 if (hash_entry->cfentry_cfil != NULL) {
6548 cfil_info = hash_entry->cfentry_cfil;
6549 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6550 entry = &cfil_info->cfi_entries[kcunit - 1];
6551
6552 /* Are we attached to the filter? */
6553 if (entry->cfe_filter == NULL) {
6554 continue;
6555 }
6556
6557 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6558 continue;
6559 }
6560 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6561 continue;
6562 }
6563
6564 attached = 1;
6565
6566 if (need_wait == TRUE) {
6567 #if LIFECYCLE_DEBUG
6568 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6569 #endif
6570
6571 ts.tv_sec = cfil_close_wait_timeout / 1000;
6572 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6573 NSEC_PER_USEC * 1000;
6574
6575 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6576 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6577 sock_flow_id = cfil_info->cfi_sock_id;
6578
6579 error = msleep((caddr_t)cfil_info, mutex_held,
6580 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6581
6582 // Woke up from sleep, validate if cfil_info is still valid
6583 if (so->so_cfil_db == NULL ||
6584 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6585 // cfil_info is not valid, do not continue
6586 goto done;
6587 }
6588
6589 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6590
6591 #if LIFECYCLE_DEBUG
6592 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6593 #endif
6594
6595 /*
6596 * Force close in case of timeout
6597 */
6598 if (error != 0) {
6599 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6600 #if LIFECYCLE_DEBUG
6601 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6602 #endif
6603 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6604 }
6605 }
6606 goto done;
6607 }
6608 }
6609 }
6610 }
6611 }
6612
6613 done:
6614 return attached;
6615 }
6616
6617 int32_t
6618 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6619 {
6620 struct socket *so = sb->sb_so;
6621 struct cfi_buf *cfi_buf;
6622 uint64_t pending = 0;
6623 uint64_t total_pending = 0;
6624 struct cfilhashhead *cfilhash = NULL;
6625 struct cfil_db *db = NULL;
6626 struct cfil_hash_entry *hash_entry = NULL;
6627 struct cfil_hash_entry *temp_hash_entry = NULL;
6628
6629 socket_lock_assert_owned(so);
6630
6631 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6632 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6633 db = so->so_cfil_db;
6634
6635 for (int i = 0; i < CFILHASHSIZE; i++) {
6636 cfilhash = &db->cfdb_hashbase[i];
6637
6638 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6639 if (hash_entry->cfentry_cfil != NULL) {
6640 if ((sb->sb_flags & SB_RECV) == 0) {
6641 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6642 } else {
6643 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6644 }
6645
6646 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6647 /*
6648 * If we are limited by the "chars of mbufs used" roughly
6649 * adjust so we won't overcommit
6650 */
6651 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6652 pending = cfi_buf->cfi_pending_mbcnt;
6653 }
6654
6655 total_pending += pending;
6656 }
6657 }
6658 }
6659
6660 VERIFY(total_pending < INT32_MAX);
6661 #if DATA_DEBUG
6662 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6663 (uint64_t)VM_KERNEL_ADDRPERM(so),
6664 total_pending, check_thread);
6665 #endif
6666 }
6667
6668 return (int32_t)(total_pending);
6669 }
6670
6671 int
6672 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6673 {
6674 struct cfil_info *cfil_info = NULL;
6675 struct cfilhashhead *cfilhash = NULL;
6676 struct cfil_db *db = NULL;
6677 struct cfil_hash_entry *hash_entry = NULL;
6678 struct cfil_hash_entry *temp_hash_entry = NULL;
6679 errno_t error = 0;
6680 int done_count = 0;
6681 int kcunit;
6682
6683 socket_lock_assert_owned(so);
6684
6685 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6686 db = so->so_cfil_db;
6687
6688 for (int i = 0; i < CFILHASHSIZE; i++) {
6689 cfilhash = &db->cfdb_hashbase[i];
6690
6691 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6692 if (hash_entry->cfentry_cfil != NULL) {
6693 cfil_info = hash_entry->cfentry_cfil;
6694
6695 // This flow is marked as DROP
6696 if (cfil_info->cfi_flags & drop_flag) {
6697 done_count++;
6698 continue;
6699 }
6700
6701 // This flow has been shut already, skip
6702 if (cfil_info->cfi_flags & shut_flag) {
6703 continue;
6704 }
6705 // Mark flow as shut
6706 cfil_info->cfi_flags |= shut_flag;
6707 done_count++;
6708
6709 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6710 /* Disconnect incoming side */
6711 if (how != SHUT_WR) {
6712 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6713 }
6714 /* Disconnect outgoing side */
6715 if (how != SHUT_RD) {
6716 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6717 }
6718 }
6719 }
6720 }
6721 }
6722 }
6723
6724 if (done_count == 0) {
6725 error = ENOTCONN;
6726 }
6727 return error;
6728 }
6729
6730 int
6731 cfil_sock_udp_shutdown(struct socket *so, int *how)
6732 {
6733 int error = 0;
6734
6735 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6736 goto done;
6737 }
6738
6739 socket_lock_assert_owned(so);
6740
6741 CFIL_LOG(LOG_INFO, "so %llx how %d",
6742 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6743
6744 /*
6745 * Check the state of the socket before the content filter
6746 */
6747 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6748 /* read already shut down */
6749 error = ENOTCONN;
6750 goto done;
6751 }
6752 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6753 /* write already shut down */
6754 error = ENOTCONN;
6755 goto done;
6756 }
6757
6758 /*
6759 * shutdown read: SHUT_RD or SHUT_RDWR
6760 */
6761 if (*how != SHUT_WR) {
6762 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6763 if (error != 0) {
6764 goto done;
6765 }
6766 }
6767 /*
6768 * shutdown write: SHUT_WR or SHUT_RDWR
6769 */
6770 if (*how != SHUT_RD) {
6771 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6772 if (error != 0) {
6773 goto done;
6774 }
6775
6776 /*
6777 * When outgoing data is pending, we delay the shutdown at the
6778 * protocol level until the content filters give the final
6779 * verdict on the pending data.
6780 */
6781 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6782 /*
6783 * When shutting down the read and write sides at once
6784 * we can proceed to the final shutdown of the read
6785 * side. Otherwise, we just return.
6786 */
6787 if (*how == SHUT_WR) {
6788 error = EJUSTRETURN;
6789 } else if (*how == SHUT_RDWR) {
6790 *how = SHUT_RD;
6791 }
6792 }
6793 }
6794 done:
6795 return error;
6796 }
6797
6798 void
6799 cfil_sock_udp_close_wait(struct socket *so)
6800 {
6801 socket_lock_assert_owned(so);
6802
6803 while (cfil_filters_udp_attached(so, FALSE)) {
6804 /*
6805 * Notify the filters we are going away so they can detach
6806 */
6807 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6808
6809 /*
6810 * Make sure we need to wait after the filter are notified
6811 * of the disconnection
6812 */
6813 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6814 break;
6815 }
6816 }
6817 }
6818
6819 void
6820 cfil_sock_udp_is_closed(struct socket *so)
6821 {
6822 struct cfil_info *cfil_info = NULL;
6823 struct cfilhashhead *cfilhash = NULL;
6824 struct cfil_db *db = NULL;
6825 struct cfil_hash_entry *hash_entry = NULL;
6826 struct cfil_hash_entry *temp_hash_entry = NULL;
6827 errno_t error = 0;
6828 int kcunit;
6829
6830 socket_lock_assert_owned(so);
6831
6832 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6833 db = so->so_cfil_db;
6834
6835 for (int i = 0; i < CFILHASHSIZE; i++) {
6836 cfilhash = &db->cfdb_hashbase[i];
6837
6838 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6839 if (hash_entry->cfentry_cfil != NULL) {
6840 cfil_info = hash_entry->cfentry_cfil;
6841
6842 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6843 /* Let the filters know of the closing */
6844 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6845 }
6846
6847 /* Last chance to push passed data out */
6848 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6849 if (error == 0) {
6850 cfil_service_inject_queue(so, cfil_info, 1);
6851 }
6852 cfil_release_sockbuf(so, 1);
6853
6854 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6855
6856 /* Pending data needs to go */
6857 cfil_flush_queues(so, cfil_info);
6858
6859 CFIL_INFO_VERIFY(cfil_info);
6860 }
6861 }
6862 }
6863 }
6864 }
6865
6866 void
6867 cfil_sock_udp_buf_update(struct sockbuf *sb)
6868 {
6869 struct cfil_info *cfil_info = NULL;
6870 struct cfilhashhead *cfilhash = NULL;
6871 struct cfil_db *db = NULL;
6872 struct cfil_hash_entry *hash_entry = NULL;
6873 struct cfil_hash_entry *temp_hash_entry = NULL;
6874 errno_t error = 0;
6875 int outgoing;
6876 struct socket *so = sb->sb_so;
6877
6878 socket_lock_assert_owned(so);
6879
6880 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6881 if (!cfil_sbtrim) {
6882 return;
6883 }
6884
6885 db = so->so_cfil_db;
6886
6887 for (int i = 0; i < CFILHASHSIZE; i++) {
6888 cfilhash = &db->cfdb_hashbase[i];
6889
6890 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6891 if (hash_entry->cfentry_cfil != NULL) {
6892 cfil_info = hash_entry->cfentry_cfil;
6893
6894 if ((sb->sb_flags & SB_RECV) == 0) {
6895 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6896 return;
6897 }
6898 outgoing = 1;
6899 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6900 } else {
6901 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6902 return;
6903 }
6904 outgoing = 0;
6905 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6906 }
6907
6908 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6909 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6910
6911 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6912 if (error == 0) {
6913 cfil_service_inject_queue(so, cfil_info, outgoing);
6914 }
6915 cfil_release_sockbuf(so, outgoing);
6916 }
6917 }
6918 }
6919 }
6920 }
6921
6922 void
6923 cfil_filter_show(u_int32_t kcunit)
6924 {
6925 struct content_filter *cfc = NULL;
6926 struct cfil_entry *entry;
6927 int count = 0;
6928
6929 if (content_filters == NULL) {
6930 return;
6931 }
6932 if (kcunit > MAX_CONTENT_FILTER) {
6933 return;
6934 }
6935
6936 cfil_rw_lock_shared(&cfil_lck_rw);
6937
6938 if (content_filters[kcunit - 1] == NULL) {
6939 cfil_rw_unlock_shared(&cfil_lck_rw);
6940 return;
6941 }
6942 cfc = content_filters[kcunit - 1];
6943
6944 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6945 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6946 if (cfc->cf_flags & CFF_DETACHING) {
6947 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6948 }
6949 if (cfc->cf_flags & CFF_ACTIVE) {
6950 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6951 }
6952 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6953 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6954 }
6955
6956 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6957 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6958 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6959
6960 count++;
6961
6962 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6963 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6964 } else {
6965 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6966 }
6967 }
6968 }
6969
6970 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6971
6972 cfil_rw_unlock_shared(&cfil_lck_rw);
6973 }
6974
6975 void
6976 cfil_info_show(void)
6977 {
6978 struct cfil_info *cfil_info;
6979 int count = 0;
6980
6981 cfil_rw_lock_shared(&cfil_lck_rw);
6982
6983 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6984
6985 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6986 count++;
6987
6988 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6989
6990 if (cfil_info->cfi_flags & CFIF_DROP) {
6991 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6992 }
6993 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6994 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6995 }
6996 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6997 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6998 }
6999 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
7000 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
7001 }
7002 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
7003 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
7004 }
7005 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
7006 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
7007 }
7008 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
7009 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
7010 }
7011 }
7012
7013 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
7014
7015 cfil_rw_unlock_shared(&cfil_lck_rw);
7016 }
7017
7018 bool
7019 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int64_t current_time)
7020 {
7021 if (cfil_info && cfil_info->cfi_hash_entry &&
7022 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int64_t)timeout)) {
7023 #if GC_DEBUG
7024 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
7025 #endif
7026 return true;
7027 }
7028 return false;
7029 }
7030
7031 bool
7032 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
7033 {
7034 struct cfil_entry *entry;
7035 struct timeval current_tv;
7036 struct timeval diff_time;
7037
7038 if (cfil_info == NULL) {
7039 return false;
7040 }
7041
7042 /*
7043 * If we have queued up more data than passed offset and we haven't received
7044 * an action from user space for a while (the user space filter might have crashed),
7045 * return action timed out.
7046 */
7047 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
7048 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
7049 microuptime(&current_tv);
7050
7051 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7052 entry = &cfil_info->cfi_entries[kcunit - 1];
7053
7054 if (entry->cfe_filter == NULL) {
7055 continue;
7056 }
7057
7058 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
7059 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
7060 // haven't gotten an action from this filter, check timeout
7061 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
7062 if (diff_time.tv_sec >= timeout) {
7063 #if GC_DEBUG
7064 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
7065 #endif
7066 return true;
7067 }
7068 }
7069 }
7070 }
7071 return false;
7072 }
7073
7074 bool
7075 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7076 {
7077 if (cfil_info == NULL) {
7078 return false;
7079 }
7080
7081 /*
7082 * Clean up flow if it exceeded queue thresholds
7083 */
7084 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7085 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7086 #if GC_DEBUG
7087 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7088 cfil_udp_gc_mbuf_num_max,
7089 cfil_udp_gc_mbuf_cnt_max,
7090 cfil_info->cfi_snd.cfi_tail_drop_cnt,
7091 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7092 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7093 #endif
7094 return true;
7095 }
7096
7097 return false;
7098 }
7099
7100 static void
7101 cfil_udp_gc_thread_sleep(bool forever)
7102 {
7103 if (forever) {
7104 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
7105 THREAD_INTERRUPTIBLE);
7106 } else {
7107 uint64_t deadline = 0;
7108 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
7109 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7110
7111 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
7112 THREAD_INTERRUPTIBLE, deadline);
7113 }
7114 }
7115
7116 static void
7117 cfil_udp_gc_thread_func(void *v, wait_result_t w)
7118 {
7119 #pragma unused(v, w)
7120
7121 ASSERT(cfil_udp_gc_thread == current_thread());
7122 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7123
7124 // Kick off gc shortly
7125 cfil_udp_gc_thread_sleep(false);
7126 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
7127 /* NOTREACHED */
7128 }
7129
7130 static void
7131 cfil_info_udp_expire(void *v, wait_result_t w)
7132 {
7133 #pragma unused(v, w)
7134
7135 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
7136 static uint32_t expired_count = 0;
7137
7138 struct cfil_info *cfil_info;
7139 struct cfil_hash_entry *hash_entry;
7140 struct cfil_db *db;
7141 struct socket *so;
7142 u_int64_t current_time = 0;
7143
7144 current_time = net_uptime();
7145
7146 // Get all expired UDP flow ids
7147 cfil_rw_lock_shared(&cfil_lck_rw);
7148
7149 if (cfil_sock_udp_attached_count == 0) {
7150 cfil_rw_unlock_shared(&cfil_lck_rw);
7151 goto go_sleep;
7152 }
7153
7154 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
7155 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
7156 break;
7157 }
7158
7159 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
7160 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
7161 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7162 cfil_info_buffer_threshold_exceeded(cfil_info)) {
7163 expired_array[expired_count] = cfil_info->cfi_sock_id;
7164 expired_count++;
7165 }
7166 }
7167 }
7168 cfil_rw_unlock_shared(&cfil_lck_rw);
7169
7170 if (expired_count == 0) {
7171 goto go_sleep;
7172 }
7173
7174 for (uint32_t i = 0; i < expired_count; i++) {
7175 // Search for socket (UDP only and lock so)
7176 so = cfil_socket_from_sock_id(expired_array[i], true);
7177 if (so == NULL) {
7178 continue;
7179 }
7180
7181 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
7182 if (cfil_info == NULL) {
7183 goto unlock;
7184 }
7185
7186 db = so->so_cfil_db;
7187 hash_entry = cfil_info->cfi_hash_entry;
7188
7189 if (db == NULL || hash_entry == NULL) {
7190 goto unlock;
7191 }
7192
7193 #if GC_DEBUG || LIFECYCLE_DEBUG
7194 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
7195 #endif
7196
7197 cfil_db_delete_entry(db, hash_entry);
7198 CFIL_INFO_FREE(cfil_info);
7199 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7200
7201 if (so->so_flags & SOF_CONTENT_FILTER) {
7202 if (db->cfdb_count == 0) {
7203 so->so_flags &= ~SOF_CONTENT_FILTER;
7204 }
7205 VERIFY(so->so_usecount > 0);
7206 so->so_usecount--;
7207 }
7208 unlock:
7209 socket_unlock(so, 1);
7210 }
7211
7212 #if GC_DEBUG
7213 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
7214 #endif
7215 expired_count = 0;
7216
7217 go_sleep:
7218
7219 // Sleep forever (until waken up) if no more UDP flow to clean
7220 cfil_rw_lock_shared(&cfil_lck_rw);
7221 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7222 cfil_rw_unlock_shared(&cfil_lck_rw);
7223 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7224 /* NOTREACHED */
7225 }
7226
7227 struct m_tag *
7228 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7229 {
7230 struct m_tag *tag = NULL;
7231 struct cfil_tag *ctag = NULL;
7232 struct cfil_hash_entry *hash_entry = NULL;
7233 struct inpcb *inp = NULL;
7234
7235 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7236 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7237 return NULL;
7238 }
7239
7240 inp = sotoinpcb(cfil_info->cfi_so);
7241
7242 /* Allocate a tag */
7243 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7244 sizeof(struct cfil_tag), M_DONTWAIT, m);
7245
7246 if (tag) {
7247 ctag = (struct cfil_tag*)(tag + 1);
7248 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7249 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7250 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7251
7252 hash_entry = cfil_info->cfi_hash_entry;
7253 if (hash_entry->cfentry_family == AF_INET6) {
7254 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7255 &hash_entry->cfentry_faddr.addr6,
7256 hash_entry->cfentry_fport);
7257 } else if (hash_entry->cfentry_family == AF_INET) {
7258 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7259 hash_entry->cfentry_faddr.addr46.ia46_addr4,
7260 hash_entry->cfentry_fport);
7261 }
7262 m_tag_prepend(m, tag);
7263 return tag;
7264 }
7265 return NULL;
7266 }
7267
7268 struct m_tag *
7269 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7270 struct sockaddr **faddr, int *inp_flags)
7271 {
7272 struct m_tag *tag = NULL;
7273 struct cfil_tag *ctag = NULL;
7274
7275 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7276 if (tag) {
7277 ctag = (struct cfil_tag *)(tag + 1);
7278 if (state_change_cnt) {
7279 *state_change_cnt = ctag->cfil_so_state_change_cnt;
7280 }
7281 if (options) {
7282 *options = ctag->cfil_so_options;
7283 }
7284 if (faddr) {
7285 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7286 }
7287 if (inp_flags) {
7288 *inp_flags = ctag->cfil_inp_flags;
7289 }
7290
7291 /*
7292 * Unlink tag and hand it over to caller.
7293 * Note that caller will be responsible to free it.
7294 */
7295 m_tag_unlink(m, tag);
7296 return tag;
7297 }
7298 return NULL;
7299 }
7300
7301 boolean_t
7302 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7303 {
7304 struct m_tag *tag = NULL;
7305 struct cfil_tag *ctag = NULL;
7306
7307 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7308 if (tag) {
7309 ctag = (struct cfil_tag *)(tag + 1);
7310 if (inp_flags) {
7311 *inp_flags = ctag->cfil_inp_flags;
7312 }
7313 return true;
7314 }
7315 return false;
7316 }
7317
7318 static int
7319 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7320 {
7321 struct content_filter *cfc = NULL;
7322 errno_t error = 0;
7323 size_t msgsize = 0;
7324
7325 if (buffer == NULL || stats_count == 0) {
7326 return error;
7327 }
7328
7329 if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7330 return error;
7331 }
7332
7333 cfc = content_filters[kcunit - 1];
7334 if (cfc == NULL) {
7335 return error;
7336 }
7337
7338 /* Would be wasteful to try */
7339 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7340 error = ENOBUFS;
7341 goto done;
7342 }
7343
7344 msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7345 buffer->msghdr.cfm_len = (uint32_t)msgsize;
7346 buffer->msghdr.cfm_version = 1;
7347 buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7348 buffer->msghdr.cfm_op = CFM_OP_STATS;
7349 buffer->msghdr.cfm_sock_id = 0;
7350 buffer->count = stats_count;
7351
7352 #if STATS_DEBUG
7353 CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7354 kcunit,
7355 (unsigned long)msgsize,
7356 (unsigned long)sizeof(struct cfil_msg_stats_report),
7357 (unsigned long)sizeof(struct cfil_msg_sock_stats),
7358 (unsigned long)stats_count);
7359 #endif
7360
7361 error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7362 buffer,
7363 msgsize,
7364 CTL_DATA_EOR);
7365 if (error != 0) {
7366 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7367 goto done;
7368 }
7369 OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7370
7371 #if STATS_DEBUG
7372 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7373 #endif
7374
7375 done:
7376
7377 if (error == ENOBUFS) {
7378 OSIncrementAtomic(
7379 &cfil_stats.cfs_stats_event_flow_control);
7380
7381 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7382 cfil_rw_lock_exclusive(&cfil_lck_rw);
7383 }
7384
7385 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7386
7387 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7388 } else if (error != 0) {
7389 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7390 }
7391
7392 return error;
7393 }
7394
7395 static void
7396 cfil_stats_report_thread_sleep(bool forever)
7397 {
7398 #if STATS_DEBUG
7399 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7400 #endif
7401
7402 if (forever) {
7403 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7404 THREAD_INTERRUPTIBLE);
7405 } else {
7406 uint64_t deadline = 0;
7407 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7408 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7409
7410 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7411 THREAD_INTERRUPTIBLE, deadline);
7412 }
7413 }
7414
7415 static void
7416 cfil_stats_report_thread_func(void *v, wait_result_t w)
7417 {
7418 #pragma unused(v, w)
7419
7420 ASSERT(cfil_stats_report_thread == current_thread());
7421 thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7422
7423 // Kick off gc shortly
7424 cfil_stats_report_thread_sleep(false);
7425 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7426 /* NOTREACHED */
7427 }
7428
7429 static bool
7430 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7431 struct cfil_info *cfil_info,
7432 struct cfil_entry *entry,
7433 struct timeval current_tv)
7434 {
7435 struct cfil_stats_report_buffer *buffer = NULL;
7436 struct cfil_msg_sock_stats *flow_array = NULL;
7437 struct cfil_msg_sock_stats *stats = NULL;
7438 struct inpcb *inp = NULL;
7439 struct timeval diff_time;
7440 uint64_t diff_time_usecs;
7441 int index = 0;
7442
7443 if (entry->cfe_stats_report_frequency == 0) {
7444 return false;
7445 }
7446
7447 buffer = global_cfil_stats_report_buffers[kcunit - 1];
7448 if (buffer == NULL) {
7449 #if STATS_DEBUG
7450 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7451 #endif
7452 return false;
7453 }
7454
7455 timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7456 diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7457
7458 #if STATS_DEBUG
7459 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7460 (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7461 (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7462 (unsigned long long)current_tv.tv_sec,
7463 (unsigned long long)current_tv.tv_usec,
7464 (unsigned long long)diff_time.tv_sec,
7465 (unsigned long long)diff_time.tv_usec,
7466 (unsigned long long)diff_time_usecs,
7467 (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7468 cfil_info->cfi_sock_id);
7469 #endif
7470
7471 // Compare elapsed time in usecs
7472 if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7473 #if STATS_DEBUG
7474 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7475 cfil_info->cfi_byte_inbound_count,
7476 entry->cfe_byte_inbound_count_reported);
7477 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7478 cfil_info->cfi_byte_outbound_count,
7479 entry->cfe_byte_outbound_count_reported);
7480 #endif
7481 // Check if flow has new bytes that have not been reported
7482 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7483 entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7484 flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7485 index = global_cfil_stats_counts[kcunit - 1];
7486
7487 stats = &flow_array[index];
7488 stats->cfs_sock_id = cfil_info->cfi_sock_id;
7489 stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7490 stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7491
7492 if (entry->cfe_laddr_sent == false) {
7493 /* cache it if necessary */
7494 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7495 inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7496 if (inp != NULL) {
7497 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7498 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7499 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7500 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7501 src, dst, !IS_INP_V6(inp), outgoing);
7502 }
7503 }
7504
7505 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7506 stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7507 entry->cfe_laddr_sent = true;
7508 }
7509 }
7510
7511 global_cfil_stats_counts[kcunit - 1]++;
7512
7513 entry->cfe_stats_report_ts = current_tv;
7514 entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7515 entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7516 #if STATS_DEBUG
7517 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7518 #endif
7519 CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7520 return true;
7521 }
7522 }
7523 return false;
7524 }
7525
7526 static void
7527 cfil_stats_report(void *v, wait_result_t w)
7528 {
7529 #pragma unused(v, w)
7530
7531 struct cfil_info *cfil_info = NULL;
7532 struct cfil_entry *entry = NULL;
7533 struct timeval current_tv;
7534 uint32_t flow_count = 0;
7535 uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7536 bool flow_reported = false;
7537
7538 #if STATS_DEBUG
7539 CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7540 #endif
7541
7542 do {
7543 // Collect all sock ids of flows that has new stats
7544 cfil_rw_lock_shared(&cfil_lck_rw);
7545
7546 if (cfil_sock_attached_stats_count == 0) {
7547 #if STATS_DEBUG
7548 CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7549 #endif
7550 cfil_rw_unlock_shared(&cfil_lck_rw);
7551 goto go_sleep;
7552 }
7553
7554 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7555 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7556 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7557 }
7558 global_cfil_stats_counts[kcunit - 1] = 0;
7559 }
7560
7561 microuptime(&current_tv);
7562 flow_count = 0;
7563
7564 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7565 if (saved_next_sock_id != 0 &&
7566 saved_next_sock_id == cfil_info->cfi_sock_id) {
7567 // Here is where we left off previously, start accumulating
7568 saved_next_sock_id = 0;
7569 }
7570
7571 if (saved_next_sock_id == 0) {
7572 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7573 // Examine a fixed number of flows each round. Remember the current flow
7574 // so we can start from here for next loop
7575 saved_next_sock_id = cfil_info->cfi_sock_id;
7576 break;
7577 }
7578
7579 flow_reported = false;
7580 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7581 entry = &cfil_info->cfi_entries[kcunit - 1];
7582 if (entry->cfe_filter == NULL) {
7583 #if STATS_DEBUG
7584 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7585 cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7586 #endif
7587 continue;
7588 }
7589
7590 if ((entry->cfe_stats_report_frequency > 0) &&
7591 cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7592 flow_reported = true;
7593 }
7594 }
7595 if (flow_reported == true) {
7596 flow_count++;
7597 }
7598 }
7599 }
7600
7601 if (flow_count > 0) {
7602 #if STATS_DEBUG
7603 CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7604 #endif
7605 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7606 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7607 global_cfil_stats_counts[kcunit - 1] > 0) {
7608 cfil_dispatch_stats_event_locked(kcunit,
7609 global_cfil_stats_report_buffers[kcunit - 1],
7610 global_cfil_stats_counts[kcunit - 1]);
7611 }
7612 }
7613 } else {
7614 cfil_rw_unlock_shared(&cfil_lck_rw);
7615 goto go_sleep;
7616 }
7617
7618 cfil_rw_unlock_shared(&cfil_lck_rw);
7619
7620 // Loop again if we haven't finished the whole cfil_info list
7621 } while (saved_next_sock_id != 0);
7622
7623 go_sleep:
7624
7625 // Sleep forever (until waken up) if no more flow to report
7626 cfil_rw_lock_shared(&cfil_lck_rw);
7627 cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7628 cfil_rw_unlock_shared(&cfil_lck_rw);
7629 thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7630 /* NOTREACHED */
7631 }