]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/content_filter.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
1 /*
2 * Copyright (c) 2013-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
35 *
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
57 * database to specify which TCP/IP sockets need to be filtered. The NECP
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
120 * action message is sent by the user space filter agent.
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
126 * They are two kinds of primary content filter actions:
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
132 *
133 *
134 * ACTION MESSAGES
135 *
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
143 *
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
147 *
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
157 *
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
162 *
163 *
164 * PER SOCKET "struct cfil_info"
165 *
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
168 * socket.
169 *
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
174 * decision;
175 * - The inject queue for data that passed the filters and that needs
176 * to be re-injected;
177 * - A content filter specific state in a set of "struct cfil_entry"
178 *
179 *
180 * CONTENT FILTER STATE "struct cfil_entry"
181 *
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
184 *
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
188 *
189 * For each direction, "struct cfil_entry" maintains the following information:
190 * - The pass offset
191 * - The peek offset
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
197 *
198 *
199 * CONTENT FILTER QUEUES
200 *
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
202 * and instead will sit in one of three content filter queues until the data
203 * can be re-injected into the TCP/IP socket buffer.
204 *
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
207 * the list of mbufs.
208 *
209 * The data moves into the three content filter queues according to this
210 * sequence:
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
214 *
215 * Note: The sequence (a),(b) may be repeated several times if there is more
216 * than one content filter attached to the TCP/IP socket.
217 *
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
222 *
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
228 *
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
231 * TCP/IP socket.
232 *
233 *
234 * IMPACT ON FLOW CONTROL
235 *
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
238 *
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
242 * processing delays.
243 *
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
250 *
251 *
252 * LOCKING STRATEGY
253 *
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
257 * threads.
258 *
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
261 *
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
265 *
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
269 *
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
272 *
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
276 *
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
279 *
280 *
281 * LIMITATIONS
282 *
283 * - For TCP sockets only
284 *
285 * - Does not support TCP unordered messages
286 */
287
288 /*
289 * TO DO LIST
290 *
291 * SOONER:
292 *
293 * Deal with OOB
294 *
295 * LATER:
296 *
297 * If support datagram, enqueue control and address mbufs as well
298 */
299
300 #include <sys/types.h>
301 #include <sys/kern_control.h>
302 #include <sys/queue.h>
303 #include <sys/domain.h>
304 #include <sys/protosw.h>
305 #include <sys/syslog.h>
306 #include <sys/systm.h>
307 #include <sys/param.h>
308 #include <sys/mbuf.h>
309
310 #include <kern/locks.h>
311 #include <kern/zalloc.h>
312 #include <kern/debug.h>
313
314 #include <net/content_filter.h>
315 #include <net/content_filter_crypto.h>
316
317 #include <netinet/in_pcb.h>
318 #include <netinet/tcp.h>
319 #include <netinet/tcp_var.h>
320 #include <netinet/udp.h>
321 #include <netinet/udp_var.h>
322
323 #include <string.h>
324 #include <libkern/libkern.h>
325 #include <kern/sched_prim.h>
326 #include <kern/task.h>
327 #include <mach/task_info.h>
328
329 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
330 #define MAX_CONTENT_FILTER 2
331 #else
332 #define MAX_CONTENT_FILTER 8
333 #endif
334
335 struct cfil_entry;
336
337 /*
338 * The structure content_filter represents a user space content filter
339 * It's created and associated with a kernel control socket instance
340 */
341 struct content_filter {
342 kern_ctl_ref cf_kcref;
343 u_int32_t cf_kcunit;
344 u_int32_t cf_flags;
345
346 uint32_t cf_necp_control_unit;
347
348 uint32_t cf_sock_count;
349 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
350
351 cfil_crypto_state_t cf_crypto_state;
352 };
353
354 #define CFF_ACTIVE 0x01
355 #define CFF_DETACHING 0x02
356 #define CFF_FLOW_CONTROLLED 0x04
357
358 struct content_filter **content_filters = NULL;
359 uint32_t cfil_active_count = 0; /* Number of active content filters */
360 uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
361 uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
362 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
363
364 static kern_ctl_ref cfil_kctlref = NULL;
365
366 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
367 static lck_attr_t *cfil_lck_attr = NULL;
368 static lck_grp_t *cfil_lck_grp = NULL;
369 decl_lck_rw_data(static, cfil_lck_rw);
370
371 #define CFIL_RW_LCK_MAX 8
372
373 int cfil_rw_nxt_lck = 0;
374 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
375
376 int cfil_rw_nxt_unlck = 0;
377 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
378
379 #define CONTENT_FILTER_ZONE_NAME "content_filter"
380 #define CONTENT_FILTER_ZONE_MAX 10
381 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
382
383
384 #define CFIL_INFO_ZONE_NAME "cfil_info"
385 #define CFIL_INFO_ZONE_MAX 1024
386 static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
387
388 MBUFQ_HEAD(cfil_mqhead);
389
390 struct cfil_queue {
391 uint64_t q_start; /* offset of first byte in queue */
392 uint64_t q_end; /* offset of last byte in queue */
393 struct cfil_mqhead q_mq;
394 };
395
396 /*
397 * struct cfil_entry
398 *
399 * The is one entry per content filter
400 */
401 struct cfil_entry {
402 TAILQ_ENTRY(cfil_entry) cfe_link;
403 SLIST_ENTRY(cfil_entry) cfe_order_link;
404 struct content_filter *cfe_filter;
405
406 struct cfil_info *cfe_cfil_info;
407 uint32_t cfe_flags;
408 uint32_t cfe_necp_control_unit;
409 struct timeval cfe_last_event; /* To user space */
410 struct timeval cfe_last_action; /* From user space */
411
412 struct cfe_buf {
413 /*
414 * cfe_pending_q holds data that has been delivered to
415 * the filter and for which we are waiting for an action
416 */
417 struct cfil_queue cfe_pending_q;
418 /*
419 * This queue is for data that has not be delivered to
420 * the content filter (new data, pass peek or flow control)
421 */
422 struct cfil_queue cfe_ctl_q;
423
424 uint64_t cfe_pass_offset;
425 uint64_t cfe_peek_offset;
426 uint64_t cfe_peeked;
427 } cfe_snd, cfe_rcv;
428 };
429
430 #define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
431 #define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
432 #define CFEF_DATA_START 0x0004 /* can send data event */
433 #define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
434 #define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
435 #define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
436 #define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
437 #define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
438
439
440 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
441 struct timeval _tdiff; \
442 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
443 timersub(t1, t0, &_tdiff); \
444 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
445 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
446 (cfil)->cfi_op_list_ctr ++; \
447 }
448
449 struct cfil_hash_entry;
450
451 /*
452 * struct cfil_info
453 *
454 * There is a struct cfil_info per socket
455 */
456 struct cfil_info {
457 TAILQ_ENTRY(cfil_info) cfi_link;
458 struct socket *cfi_so;
459 uint64_t cfi_flags;
460 uint64_t cfi_sock_id;
461 struct timeval64 cfi_first_event;
462 uint32_t cfi_op_list_ctr;
463 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
464 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
465 union sockaddr_in_4_6 cfi_so_attach_faddr; /* faddr at the time of attach */
466 union sockaddr_in_4_6 cfi_so_attach_laddr; /* laddr at the time of attach */
467
468 int cfi_dir;
469 uint64_t cfi_byte_inbound_count;
470 uint64_t cfi_byte_outbound_count;
471
472 boolean_t cfi_isSignatureLatest; /* Indicates if signature covers latest flow attributes */
473 struct cfi_buf {
474 /*
475 * cfi_pending_first and cfi_pending_last describe the total
476 * amount of data outstanding for all the filters on
477 * this socket and data in the flow queue
478 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
479 */
480 uint64_t cfi_pending_first;
481 uint64_t cfi_pending_last;
482 uint32_t cfi_pending_mbcnt;
483 uint32_t cfi_pending_mbnum;
484 uint32_t cfi_tail_drop_cnt;
485 /*
486 * cfi_pass_offset is the minimum of all the filters
487 */
488 uint64_t cfi_pass_offset;
489 /*
490 * cfi_inject_q holds data that needs to be re-injected
491 * into the socket after filtering and that can
492 * be queued because of flow control
493 */
494 struct cfil_queue cfi_inject_q;
495 } cfi_snd, cfi_rcv;
496
497 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
498 struct cfil_hash_entry *cfi_hash_entry;
499 SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
500 } __attribute__((aligned(8)));
501
502 #define CFIF_DROP 0x0001 /* drop action applied */
503 #define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
504 #define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
505 #define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
506 #define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
507 #define CFIF_SHUT_WR 0x0040 /* shutdown write */
508 #define CFIF_SHUT_RD 0x0080 /* shutdown read */
509 #define CFIF_SOCKET_CONNECTED 0x0100 /* socket is connected */
510 #define CFIF_INITIAL_VERDICT 0x0200 /* received initial verdict */
511
512 #define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
513 #define CFI_SHIFT_GENCNT 32
514 #define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
515 #define CFI_SHIFT_FLOWHASH 0
516
517 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
518
519 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
520
521 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
522 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
523
524 /*
525 * UDP Socket Support
526 */
527 LIST_HEAD(cfilhashhead, cfil_hash_entry);
528 #define CFILHASHSIZE 16
529 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
530 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
531 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
532 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
533 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
534 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
535 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
536 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
537 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
538 (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
539 (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
540
541 /*
542 * UDP Garbage Collection:
543 */
544 static struct thread *cfil_udp_gc_thread;
545 #define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
546 #define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
547 #define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
548 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
549
550 /*
551 * UDP flow queue thresholds
552 */
553 #define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
554 #define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
555 #define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
556 /*
557 * UDP flow queue threshold globals:
558 */
559 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
560 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
561
562 /*
563 * struct cfil_hash_entry
564 *
565 * Hash entry for cfil_info
566 */
567 struct cfil_hash_entry {
568 LIST_ENTRY(cfil_hash_entry) cfentry_link;
569 struct cfil_info *cfentry_cfil;
570 u_short cfentry_fport;
571 u_short cfentry_lport;
572 sa_family_t cfentry_family;
573 u_int32_t cfentry_flowhash;
574 u_int32_t cfentry_lastused;
575 union {
576 /* foreign host table entry */
577 struct in_addr_4in6 addr46;
578 struct in6_addr addr6;
579 } cfentry_faddr;
580 union {
581 /* local host table entry */
582 struct in_addr_4in6 addr46;
583 struct in6_addr addr6;
584 } cfentry_laddr;
585 };
586
587 /*
588 * struct cfil_db
589 *
590 * For each UDP socket, this is a hash table maintaining all cfil_info structs
591 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
592 */
593 struct cfil_db {
594 struct socket *cfdb_so;
595 uint32_t cfdb_count; /* Number of total content filters */
596 struct cfilhashhead *cfdb_hashbase;
597 u_long cfdb_hashmask;
598 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
599 };
600
601 /*
602 * CFIL specific mbuf tag:
603 * Save state of socket at the point of data entry into cfil.
604 * Use saved state for reinjection at protocol layer.
605 */
606 struct cfil_tag {
607 union sockaddr_in_4_6 cfil_faddr;
608 uint32_t cfil_so_state_change_cnt;
609 short cfil_so_options;
610 };
611
612 #define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
613 #define CFIL_HASH_ENTRY_ZONE_MAX 1024
614 static struct zone *cfil_hash_entry_zone = NULL;
615
616 #define CFIL_DB_ZONE_NAME "cfil_db"
617 #define CFIL_DB_ZONE_MAX 1024
618 static struct zone *cfil_db_zone = NULL;
619
620 /*
621 * Statistics
622 */
623
624 struct cfil_stats cfil_stats;
625
626 /*
627 * For troubleshooting
628 */
629 int cfil_log_level = LOG_ERR;
630 int cfil_debug = 1;
631
632 // Debug controls added for selective debugging.
633 // Disabled for production. If enabled,
634 // these will have performance impact
635 #define LIFECYCLE_DEBUG 0
636 #define VERDICT_DEBUG 0
637 #define DATA_DEBUG 0
638 #define SHOW_DEBUG 0
639 #define GC_DEBUG 0
640
641 /*
642 * Sysctls for logs and statistics
643 */
644 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
645 struct sysctl_req *);
646 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
647 struct sysctl_req *);
648
649 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
650
651 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
652 &cfil_log_level, 0, "");
653
654 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
655 &cfil_debug, 0, "");
656
657 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
658 &cfil_sock_attached_count, 0, "");
659
660 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
661 &cfil_active_count, 0, "");
662
663 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
664 &cfil_close_wait_timeout, 0, "");
665
666 static int cfil_sbtrim = 1;
667 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
668 &cfil_sbtrim, 0, "");
669
670 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
671 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
672
673 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
674 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
675
676 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
677 &cfil_stats, cfil_stats, "");
678
679 /*
680 * Forward declaration to appease the compiler
681 */
682 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
683 uint64_t, uint64_t);
684 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
685 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
686 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
687 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
688 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
689 struct mbuf *, struct mbuf *, uint32_t);
690 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
691 struct mbuf *, uint64_t);
692 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
693 struct in_addr, u_int16_t);
694 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
695 struct in6_addr *, u_int16_t);
696
697 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
698 static void cfil_info_free(struct cfil_info *);
699 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
700 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
701 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
702 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
703 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
704 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
705 static void cfil_info_verify(struct cfil_info *);
706 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
707 uint64_t, uint64_t);
708 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
709 static void cfil_release_sockbuf(struct socket *, int);
710 static int cfil_filters_attached(struct socket *);
711
712 static void cfil_rw_lock_exclusive(lck_rw_t *);
713 static void cfil_rw_unlock_exclusive(lck_rw_t *);
714 static void cfil_rw_lock_shared(lck_rw_t *);
715 static void cfil_rw_unlock_shared(lck_rw_t *);
716 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
717 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
718
719 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
720 static errno_t cfil_db_init(struct socket *);
721 static void cfil_db_free(struct socket *so);
722 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
723 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
724 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
725 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
726 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
727 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
728 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
729 struct mbuf *, struct mbuf *, uint32_t);
730 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
731 static void cfil_sock_udp_is_closed(struct socket *);
732 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
733 static int cfil_sock_udp_shutdown(struct socket *, int *);
734 static void cfil_sock_udp_close_wait(struct socket *);
735 static void cfil_sock_udp_buf_update(struct sockbuf *);
736 static int cfil_filters_udp_attached(struct socket *, bool);
737 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
738 struct in6_addr **, struct in6_addr **,
739 u_int16_t *, u_int16_t *);
740 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
741 struct in_addr *, struct in_addr *,
742 u_int16_t *, u_int16_t *);
743 static void cfil_info_log(int, struct cfil_info *, const char *);
744 void cfil_filter_show(u_int32_t);
745 void cfil_info_show(void);
746 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
747 bool cfil_info_action_timed_out(struct cfil_info *, int);
748 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
749 struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
750 static void cfil_udp_gc_thread_func(void *, wait_result_t);
751 static void cfil_info_udp_expire(void *, wait_result_t);
752 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
753 static void cfil_sock_received_verdict(struct socket *so);
754 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
755 union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
756 boolean_t, boolean_t);
757
758 bool check_port(struct sockaddr *, u_short);
759
760 /*
761 * Content filter global read write lock
762 */
763
764 static void
765 cfil_rw_lock_exclusive(lck_rw_t *lck)
766 {
767 void *lr_saved;
768
769 lr_saved = __builtin_return_address(0);
770
771 lck_rw_lock_exclusive(lck);
772
773 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
774 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
775 }
776
777 static void
778 cfil_rw_unlock_exclusive(lck_rw_t *lck)
779 {
780 void *lr_saved;
781
782 lr_saved = __builtin_return_address(0);
783
784 lck_rw_unlock_exclusive(lck);
785
786 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
787 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
788 }
789
790 static void
791 cfil_rw_lock_shared(lck_rw_t *lck)
792 {
793 void *lr_saved;
794
795 lr_saved = __builtin_return_address(0);
796
797 lck_rw_lock_shared(lck);
798
799 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
800 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
801 }
802
803 static void
804 cfil_rw_unlock_shared(lck_rw_t *lck)
805 {
806 void *lr_saved;
807
808 lr_saved = __builtin_return_address(0);
809
810 lck_rw_unlock_shared(lck);
811
812 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
813 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
814 }
815
816 static boolean_t
817 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
818 {
819 void *lr_saved;
820 boolean_t upgraded;
821
822 lr_saved = __builtin_return_address(0);
823
824 upgraded = lck_rw_lock_shared_to_exclusive(lck);
825 if (upgraded) {
826 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
827 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
828 }
829 return upgraded;
830 }
831
832 static void
833 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
834 {
835 void *lr_saved;
836
837 lr_saved = __builtin_return_address(0);
838
839 lck_rw_lock_exclusive_to_shared(lck);
840
841 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
842 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
843 }
844
845 static void
846 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
847 {
848 #if !MACH_ASSERT
849 #pragma unused(lck, exclusive)
850 #endif
851 LCK_RW_ASSERT(lck,
852 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
853 }
854
855 /*
856 * Return the number of bytes in the mbuf chain using the same
857 * method as m_length() or sballoc()
858 *
859 * Returns data len - starting from PKT start
860 * - retmbcnt - optional param to get total mbuf bytes in chain
861 * - retmbnum - optional param to get number of mbufs in chain
862 */
863 static unsigned int
864 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
865 {
866 struct mbuf *m0;
867 unsigned int pktlen = 0;
868 int mbcnt;
869 int mbnum;
870
871 // Locate the start of data
872 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
873 if (m0->m_flags & M_PKTHDR) {
874 break;
875 }
876 }
877 if (m0 == NULL) {
878 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
879 return 0;
880 }
881 m = m0;
882
883 if (retmbcnt == NULL && retmbnum == NULL) {
884 return m_length(m);
885 }
886
887 pktlen = 0;
888 mbcnt = 0;
889 mbnum = 0;
890 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
891 pktlen += m0->m_len;
892 mbnum++;
893 mbcnt += MSIZE;
894 if (m0->m_flags & M_EXT) {
895 mbcnt += m0->m_ext.ext_size;
896 }
897 }
898 if (retmbcnt) {
899 *retmbcnt = mbcnt;
900 }
901 if (retmbnum) {
902 *retmbnum = mbnum;
903 }
904 return pktlen;
905 }
906
907 static struct mbuf *
908 cfil_data_start(struct mbuf *m)
909 {
910 struct mbuf *m0;
911
912 // Locate the start of data
913 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
914 if (m0->m_flags & M_PKTHDR) {
915 break;
916 }
917 }
918 return m0;
919 }
920
921 /*
922 * Common mbuf queue utilities
923 */
924
925 static inline void
926 cfil_queue_init(struct cfil_queue *cfq)
927 {
928 cfq->q_start = 0;
929 cfq->q_end = 0;
930 MBUFQ_INIT(&cfq->q_mq);
931 }
932
933 static inline uint64_t
934 cfil_queue_drain(struct cfil_queue *cfq)
935 {
936 uint64_t drained = cfq->q_start - cfq->q_end;
937 cfq->q_start = 0;
938 cfq->q_end = 0;
939 MBUFQ_DRAIN(&cfq->q_mq);
940
941 return drained;
942 }
943
944 /* Return 1 when empty, 0 otherwise */
945 static inline int
946 cfil_queue_empty(struct cfil_queue *cfq)
947 {
948 return MBUFQ_EMPTY(&cfq->q_mq);
949 }
950
951 static inline uint64_t
952 cfil_queue_offset_first(struct cfil_queue *cfq)
953 {
954 return cfq->q_start;
955 }
956
957 static inline uint64_t
958 cfil_queue_offset_last(struct cfil_queue *cfq)
959 {
960 return cfq->q_end;
961 }
962
963 static inline uint64_t
964 cfil_queue_len(struct cfil_queue *cfq)
965 {
966 return cfq->q_end - cfq->q_start;
967 }
968
969 /*
970 * Routines to verify some fundamental assumptions
971 */
972
973 static void
974 cfil_queue_verify(struct cfil_queue *cfq)
975 {
976 mbuf_t chain;
977 mbuf_t m;
978 mbuf_t n;
979 uint64_t queuesize = 0;
980
981 /* Verify offset are ordered */
982 VERIFY(cfq->q_start <= cfq->q_end);
983
984 /*
985 * When queue is empty, the offsets are equal otherwise the offsets
986 * are different
987 */
988 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
989 (!MBUFQ_EMPTY(&cfq->q_mq) &&
990 cfq->q_start != cfq->q_end));
991
992 MBUFQ_FOREACH(chain, &cfq->q_mq) {
993 size_t chainsize = 0;
994 m = chain;
995 unsigned int mlen = cfil_data_length(m, NULL, NULL);
996 // skip the addr and control stuff if present
997 m = cfil_data_start(m);
998
999 if (m == NULL ||
1000 m == (void *)M_TAG_FREE_PATTERN ||
1001 m->m_next == (void *)M_TAG_FREE_PATTERN ||
1002 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1003 panic("%s - mq %p is free at %p", __func__,
1004 &cfq->q_mq, m);
1005 }
1006 for (n = m; n != NULL; n = n->m_next) {
1007 if (n->m_type != MT_DATA &&
1008 n->m_type != MT_HEADER &&
1009 n->m_type != MT_OOBDATA) {
1010 panic("%s - %p unsupported type %u", __func__,
1011 n, n->m_type);
1012 }
1013 chainsize += n->m_len;
1014 }
1015 if (mlen != chainsize) {
1016 panic("%s - %p m_length() %u != chainsize %lu",
1017 __func__, m, mlen, chainsize);
1018 }
1019 queuesize += chainsize;
1020 }
1021 if (queuesize != cfq->q_end - cfq->q_start) {
1022 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1023 m, queuesize, cfq->q_end - cfq->q_start);
1024 }
1025 }
1026
1027 static void
1028 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1029 {
1030 CFIL_QUEUE_VERIFY(cfq);
1031
1032 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1033 cfq->q_end += len;
1034
1035 CFIL_QUEUE_VERIFY(cfq);
1036 }
1037
1038 static void
1039 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1040 {
1041 CFIL_QUEUE_VERIFY(cfq);
1042
1043 VERIFY(cfil_data_length(m, NULL, NULL) == len);
1044
1045 MBUFQ_REMOVE(&cfq->q_mq, m);
1046 MBUFQ_NEXT(m) = NULL;
1047 cfq->q_start += len;
1048
1049 CFIL_QUEUE_VERIFY(cfq);
1050 }
1051
1052 static mbuf_t
1053 cfil_queue_first(struct cfil_queue *cfq)
1054 {
1055 return MBUFQ_FIRST(&cfq->q_mq);
1056 }
1057
1058 static mbuf_t
1059 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1060 {
1061 #pragma unused(cfq)
1062 return MBUFQ_NEXT(m);
1063 }
1064
1065 static void
1066 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1067 {
1068 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1069 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1070
1071 /* Verify the queues are ordered so that pending is before ctl */
1072 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1073
1074 /* The peek offset cannot be less than the pass offset */
1075 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1076
1077 /* Make sure we've updated the offset we peeked at */
1078 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1079 }
1080
1081 static void
1082 cfil_entry_verify(struct cfil_entry *entry)
1083 {
1084 cfil_entry_buf_verify(&entry->cfe_snd);
1085 cfil_entry_buf_verify(&entry->cfe_rcv);
1086 }
1087
1088 static void
1089 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1090 {
1091 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1092
1093 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1094 }
1095
1096 static void
1097 cfil_info_verify(struct cfil_info *cfil_info)
1098 {
1099 int i;
1100
1101 if (cfil_info == NULL) {
1102 return;
1103 }
1104
1105 cfil_info_buf_verify(&cfil_info->cfi_snd);
1106 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1107
1108 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1109 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1110 }
1111 }
1112
1113 static void
1114 verify_content_filter(struct content_filter *cfc)
1115 {
1116 struct cfil_entry *entry;
1117 uint32_t count = 0;
1118
1119 VERIFY(cfc->cf_sock_count >= 0);
1120
1121 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1122 count++;
1123 VERIFY(cfc == entry->cfe_filter);
1124 }
1125 VERIFY(count == cfc->cf_sock_count);
1126 }
1127
1128 /*
1129 * Kernel control socket callbacks
1130 */
1131 static errno_t
1132 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1133 void **unitinfo)
1134 {
1135 errno_t error = 0;
1136 struct content_filter *cfc = NULL;
1137
1138 CFIL_LOG(LOG_NOTICE, "");
1139
1140 cfc = zalloc(content_filter_zone);
1141 if (cfc == NULL) {
1142 CFIL_LOG(LOG_ERR, "zalloc failed");
1143 error = ENOMEM;
1144 goto done;
1145 }
1146 bzero(cfc, sizeof(struct content_filter));
1147
1148 cfil_rw_lock_exclusive(&cfil_lck_rw);
1149 if (content_filters == NULL) {
1150 struct content_filter **tmp;
1151
1152 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1153
1154 MALLOC(tmp,
1155 struct content_filter **,
1156 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1157 M_TEMP,
1158 M_WAITOK | M_ZERO);
1159
1160 cfil_rw_lock_exclusive(&cfil_lck_rw);
1161
1162 if (tmp == NULL && content_filters == NULL) {
1163 error = ENOMEM;
1164 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1165 goto done;
1166 }
1167 /* Another thread may have won the race */
1168 if (content_filters != NULL) {
1169 FREE(tmp, M_TEMP);
1170 } else {
1171 content_filters = tmp;
1172 }
1173 }
1174
1175 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1176 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1177 error = EINVAL;
1178 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1179 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1180 error = EADDRINUSE;
1181 } else {
1182 /*
1183 * kernel control socket kcunit numbers start at 1
1184 */
1185 content_filters[sac->sc_unit - 1] = cfc;
1186
1187 cfc->cf_kcref = kctlref;
1188 cfc->cf_kcunit = sac->sc_unit;
1189 TAILQ_INIT(&cfc->cf_sock_entries);
1190
1191 *unitinfo = cfc;
1192 cfil_active_count++;
1193 }
1194 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1195 done:
1196 if (error != 0 && cfc != NULL) {
1197 zfree(content_filter_zone, cfc);
1198 }
1199
1200 if (error == 0) {
1201 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1202 } else {
1203 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1204 }
1205
1206 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1207 error, cfil_active_count, sac->sc_unit);
1208
1209 return error;
1210 }
1211
1212 static errno_t
1213 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1214 {
1215 #pragma unused(kctlref)
1216 errno_t error = 0;
1217 struct content_filter *cfc;
1218 struct cfil_entry *entry;
1219 uint64_t sock_flow_id = 0;
1220
1221 CFIL_LOG(LOG_NOTICE, "");
1222
1223 if (content_filters == NULL) {
1224 CFIL_LOG(LOG_ERR, "no content filter");
1225 error = EINVAL;
1226 goto done;
1227 }
1228 if (kcunit > MAX_CONTENT_FILTER) {
1229 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1230 kcunit, MAX_CONTENT_FILTER);
1231 error = EINVAL;
1232 goto done;
1233 }
1234
1235 cfc = (struct content_filter *)unitinfo;
1236 if (cfc == NULL) {
1237 goto done;
1238 }
1239
1240 cfil_rw_lock_exclusive(&cfil_lck_rw);
1241 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1242 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1243 kcunit);
1244 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1245 goto done;
1246 }
1247 cfc->cf_flags |= CFF_DETACHING;
1248 /*
1249 * Remove all sockets from the filter
1250 */
1251 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1252 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1253
1254 verify_content_filter(cfc);
1255 /*
1256 * Accept all outstanding data by pushing to next filter
1257 * or back to socket
1258 *
1259 * TBD: Actually we should make sure all data has been pushed
1260 * back to socket
1261 */
1262 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1263 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1264 struct socket *so = cfil_info->cfi_so;
1265 sock_flow_id = cfil_info->cfi_sock_id;
1266
1267 /* Need to let data flow immediately */
1268 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1269 CFEF_DATA_START;
1270
1271 /*
1272 * Respect locking hierarchy
1273 */
1274 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1275
1276 socket_lock(so, 1);
1277
1278 /*
1279 * When cfe_filter is NULL the filter is detached
1280 * and the entry has been removed from cf_sock_entries
1281 */
1282 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1283 cfil_rw_lock_exclusive(&cfil_lck_rw);
1284 goto release;
1285 }
1286
1287 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1288 CFM_MAX_OFFSET,
1289 CFM_MAX_OFFSET);
1290
1291 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1292 CFM_MAX_OFFSET,
1293 CFM_MAX_OFFSET);
1294
1295 cfil_rw_lock_exclusive(&cfil_lck_rw);
1296
1297 /*
1298 * Check again to make sure if the cfil_info is still valid
1299 * as the socket may have been unlocked when when calling
1300 * cfil_acquire_sockbuf()
1301 */
1302 if (entry->cfe_filter == NULL ||
1303 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1304 goto release;
1305 }
1306
1307 /* The filter is now detached */
1308 entry->cfe_flags |= CFEF_CFIL_DETACHED;
1309 #if LIFECYCLE_DEBUG
1310 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1311 #endif
1312 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1313 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1314 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1315 cfil_filters_attached(so) == 0) {
1316 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1317 (uint64_t)VM_KERNEL_ADDRPERM(so));
1318 wakeup((caddr_t)cfil_info);
1319 }
1320
1321 /*
1322 * Remove the filter entry from the content filter
1323 * but leave the rest of the state intact as the queues
1324 * may not be empty yet
1325 */
1326 entry->cfe_filter = NULL;
1327 entry->cfe_necp_control_unit = 0;
1328
1329 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1330 cfc->cf_sock_count--;
1331 release:
1332 socket_unlock(so, 1);
1333 }
1334 }
1335 verify_content_filter(cfc);
1336
1337 VERIFY(cfc->cf_sock_count == 0);
1338
1339 /*
1340 * Make filter inactive
1341 */
1342 content_filters[kcunit - 1] = NULL;
1343 cfil_active_count--;
1344 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1345
1346 if (cfc->cf_crypto_state != NULL) {
1347 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1348 cfc->cf_crypto_state = NULL;
1349 }
1350
1351 zfree(content_filter_zone, cfc);
1352 done:
1353 if (error == 0) {
1354 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1355 } else {
1356 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1357 }
1358
1359 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1360 error, cfil_active_count, kcunit);
1361
1362 return error;
1363 }
1364
1365 /*
1366 * cfil_acquire_sockbuf()
1367 *
1368 * Prevent any other thread from acquiring the sockbuf
1369 * We use sb_cfil_thread as a semaphore to prevent other threads from
1370 * messing with the sockbuf -- see sblock()
1371 * Note: We do not set SB_LOCK here because the thread may check or modify
1372 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1373 * sblock(), sbunlock() or sodefunct()
1374 */
1375 static int
1376 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1377 {
1378 thread_t tp = current_thread();
1379 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1380 lck_mtx_t *mutex_held;
1381 int error = 0;
1382
1383 /*
1384 * Wait until no thread is holding the sockbuf and other content
1385 * filter threads have released the sockbuf
1386 */
1387 while ((sb->sb_flags & SB_LOCK) ||
1388 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1389 if (so->so_proto->pr_getlock != NULL) {
1390 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1391 } else {
1392 mutex_held = so->so_proto->pr_domain->dom_mtx;
1393 }
1394
1395 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1396
1397 sb->sb_wantlock++;
1398 VERIFY(sb->sb_wantlock != 0);
1399
1400 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1401 NULL);
1402
1403 VERIFY(sb->sb_wantlock != 0);
1404 sb->sb_wantlock--;
1405 }
1406 /*
1407 * Use reference count for repetitive calls on same thread
1408 */
1409 if (sb->sb_cfil_refs == 0) {
1410 VERIFY(sb->sb_cfil_thread == NULL);
1411 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1412
1413 sb->sb_cfil_thread = tp;
1414 sb->sb_flags |= SB_LOCK;
1415 }
1416 sb->sb_cfil_refs++;
1417
1418 /* We acquire the socket buffer when we need to cleanup */
1419 if (cfil_info == NULL) {
1420 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1421 (uint64_t)VM_KERNEL_ADDRPERM(so));
1422 error = 0;
1423 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1424 CFIL_LOG(LOG_ERR, "so %llx drop set",
1425 (uint64_t)VM_KERNEL_ADDRPERM(so));
1426 error = EPIPE;
1427 }
1428
1429 return error;
1430 }
1431
1432 static void
1433 cfil_release_sockbuf(struct socket *so, int outgoing)
1434 {
1435 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1436 thread_t tp = current_thread();
1437
1438 socket_lock_assert_owned(so);
1439
1440 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1441 panic("%s sb_cfil_thread %p not current %p", __func__,
1442 sb->sb_cfil_thread, tp);
1443 }
1444 /*
1445 * Don't panic if we are defunct because SB_LOCK has
1446 * been cleared by sodefunct()
1447 */
1448 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1449 panic("%s SB_LOCK not set on %p", __func__,
1450 sb);
1451 }
1452 /*
1453 * We can unlock when the thread unwinds to the last reference
1454 */
1455 sb->sb_cfil_refs--;
1456 if (sb->sb_cfil_refs == 0) {
1457 sb->sb_cfil_thread = NULL;
1458 sb->sb_flags &= ~SB_LOCK;
1459
1460 if (sb->sb_wantlock > 0) {
1461 wakeup(&sb->sb_flags);
1462 }
1463 }
1464 }
1465
1466 cfil_sock_id_t
1467 cfil_sock_id_from_socket(struct socket *so)
1468 {
1469 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1470 return so->so_cfil->cfi_sock_id;
1471 } else {
1472 return CFIL_SOCK_ID_NONE;
1473 }
1474 }
1475
1476 static bool
1477 cfil_socket_safe_lock(struct inpcb *inp)
1478 {
1479 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1480 socket_lock(inp->inp_socket, 1);
1481 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1482 return true;
1483 }
1484 socket_unlock(inp->inp_socket, 1);
1485 }
1486 return false;
1487 }
1488
1489 static struct socket *
1490 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1491 {
1492 struct socket *so = NULL;
1493 u_int64_t gencnt = cfil_sock_id >> 32;
1494 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1495 struct inpcb *inp = NULL;
1496 struct inpcbinfo *pcbinfo = NULL;
1497
1498 #if VERDICT_DEBUG
1499 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1500 #endif
1501
1502 if (udp_only) {
1503 goto find_udp;
1504 }
1505
1506 pcbinfo = &tcbinfo;
1507 lck_rw_lock_shared(pcbinfo->ipi_lock);
1508 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1509 if (inp->inp_state != INPCB_STATE_DEAD &&
1510 inp->inp_socket != NULL &&
1511 inp->inp_flowhash == flowhash &&
1512 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1513 inp->inp_socket->so_cfil != NULL) {
1514 if (cfil_socket_safe_lock(inp)) {
1515 so = inp->inp_socket;
1516 }
1517 break;
1518 }
1519 }
1520 lck_rw_done(pcbinfo->ipi_lock);
1521 if (so != NULL) {
1522 goto done;
1523 }
1524
1525 find_udp:
1526
1527 pcbinfo = &udbinfo;
1528 lck_rw_lock_shared(pcbinfo->ipi_lock);
1529 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1530 if (inp->inp_state != INPCB_STATE_DEAD &&
1531 inp->inp_socket != NULL &&
1532 inp->inp_socket->so_cfil_db != NULL &&
1533 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1534 if (cfil_socket_safe_lock(inp)) {
1535 so = inp->inp_socket;
1536 }
1537 break;
1538 }
1539 }
1540 lck_rw_done(pcbinfo->ipi_lock);
1541
1542 done:
1543 if (so == NULL) {
1544 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1545 CFIL_LOG(LOG_DEBUG,
1546 "no socket for sock_id %llx gencnt %llx flowhash %x",
1547 cfil_sock_id, gencnt, flowhash);
1548 }
1549
1550 return so;
1551 }
1552
1553 static struct socket *
1554 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1555 {
1556 struct socket *so = NULL;
1557 struct inpcb *inp = NULL;
1558 struct inpcbinfo *pcbinfo = &tcbinfo;
1559
1560 lck_rw_lock_shared(pcbinfo->ipi_lock);
1561 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1562 if (inp->inp_state != INPCB_STATE_DEAD &&
1563 inp->inp_socket != NULL &&
1564 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1565 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1566 if (cfil_socket_safe_lock(inp)) {
1567 so = inp->inp_socket;
1568 }
1569 break;
1570 }
1571 }
1572 lck_rw_done(pcbinfo->ipi_lock);
1573 if (so != NULL) {
1574 goto done;
1575 }
1576
1577 pcbinfo = &udbinfo;
1578 lck_rw_lock_shared(pcbinfo->ipi_lock);
1579 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1580 if (inp->inp_state != INPCB_STATE_DEAD &&
1581 inp->inp_socket != NULL &&
1582 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1583 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1584 if (cfil_socket_safe_lock(inp)) {
1585 so = inp->inp_socket;
1586 }
1587 break;
1588 }
1589 }
1590 lck_rw_done(pcbinfo->ipi_lock);
1591
1592 done:
1593 return so;
1594 }
1595
1596 static errno_t
1597 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1598 int flags)
1599 {
1600 #pragma unused(kctlref, flags)
1601 errno_t error = 0;
1602 struct cfil_msg_hdr *msghdr;
1603 struct content_filter *cfc = (struct content_filter *)unitinfo;
1604 struct socket *so;
1605 struct cfil_msg_action *action_msg;
1606 struct cfil_entry *entry;
1607 struct cfil_info *cfil_info = NULL;
1608 unsigned int data_len = 0;
1609
1610 CFIL_LOG(LOG_INFO, "");
1611
1612 if (content_filters == NULL) {
1613 CFIL_LOG(LOG_ERR, "no content filter");
1614 error = EINVAL;
1615 goto done;
1616 }
1617 if (kcunit > MAX_CONTENT_FILTER) {
1618 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1619 kcunit, MAX_CONTENT_FILTER);
1620 error = EINVAL;
1621 goto done;
1622 }
1623 if (m == NULL) {
1624 CFIL_LOG(LOG_ERR, "null mbuf");
1625 error = EINVAL;
1626 goto done;
1627 }
1628 data_len = m_length(m);
1629
1630 if (data_len < sizeof(struct cfil_msg_hdr)) {
1631 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1632 error = EINVAL;
1633 goto done;
1634 }
1635 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1636 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1637 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1638 error = EINVAL;
1639 goto done;
1640 }
1641 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1642 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1643 error = EINVAL;
1644 goto done;
1645 }
1646 if (msghdr->cfm_len > data_len) {
1647 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1648 error = EINVAL;
1649 goto done;
1650 }
1651
1652 /* Validate action operation */
1653 switch (msghdr->cfm_op) {
1654 case CFM_OP_DATA_UPDATE:
1655 OSIncrementAtomic(
1656 &cfil_stats.cfs_ctl_action_data_update);
1657 break;
1658 case CFM_OP_DROP:
1659 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1660 break;
1661 case CFM_OP_BLESS_CLIENT:
1662 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1663 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1664 error = EINVAL;
1665 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1666 msghdr->cfm_len,
1667 msghdr->cfm_op);
1668 goto done;
1669 }
1670 error = cfil_action_bless_client(kcunit, msghdr);
1671 goto done;
1672 case CFM_OP_SET_CRYPTO_KEY:
1673 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1674 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1675 error = EINVAL;
1676 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1677 msghdr->cfm_len,
1678 msghdr->cfm_op);
1679 goto done;
1680 }
1681 error = cfil_action_set_crypto_key(kcunit, msghdr);
1682 goto done;
1683 default:
1684 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1685 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1686 error = EINVAL;
1687 goto done;
1688 }
1689 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1690 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1691 error = EINVAL;
1692 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1693 msghdr->cfm_len,
1694 msghdr->cfm_op);
1695 goto done;
1696 }
1697 cfil_rw_lock_shared(&cfil_lck_rw);
1698 if (cfc != (void *)content_filters[kcunit - 1]) {
1699 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1700 kcunit);
1701 error = EINVAL;
1702 cfil_rw_unlock_shared(&cfil_lck_rw);
1703 goto done;
1704 }
1705 cfil_rw_unlock_shared(&cfil_lck_rw);
1706
1707 // Search for socket (TCP+UDP and lock so)
1708 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1709 if (so == NULL) {
1710 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1711 msghdr->cfm_sock_id);
1712 error = EINVAL;
1713 goto done;
1714 }
1715
1716 cfil_info = so->so_cfil_db != NULL ?
1717 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1718
1719 if (cfil_info == NULL) {
1720 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1721 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1722 error = EINVAL;
1723 goto unlock;
1724 } else if (cfil_info->cfi_flags & CFIF_DROP) {
1725 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1726 (uint64_t)VM_KERNEL_ADDRPERM(so));
1727 error = EINVAL;
1728 goto unlock;
1729 }
1730 entry = &cfil_info->cfi_entries[kcunit - 1];
1731 if (entry->cfe_filter == NULL) {
1732 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1733 (uint64_t)VM_KERNEL_ADDRPERM(so));
1734 error = EINVAL;
1735 goto unlock;
1736 }
1737
1738 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1739 entry->cfe_flags |= CFEF_DATA_START;
1740 } else {
1741 CFIL_LOG(LOG_ERR,
1742 "so %llx attached not sent for %u",
1743 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1744 error = EINVAL;
1745 goto unlock;
1746 }
1747
1748 microuptime(&entry->cfe_last_action);
1749 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1750
1751 action_msg = (struct cfil_msg_action *)msghdr;
1752
1753 switch (msghdr->cfm_op) {
1754 case CFM_OP_DATA_UPDATE:
1755 #if VERDICT_DEBUG
1756 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1757 (uint64_t)VM_KERNEL_ADDRPERM(so),
1758 cfil_info->cfi_sock_id,
1759 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1760 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1761 #endif
1762 /*
1763 * Received verdict, at this point we know this
1764 * socket connection is allowed. Unblock thread
1765 * immediately before proceeding to process the verdict.
1766 */
1767 cfil_sock_received_verdict(so);
1768
1769 if (action_msg->cfa_out_peek_offset != 0 ||
1770 action_msg->cfa_out_pass_offset != 0) {
1771 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1772 action_msg->cfa_out_pass_offset,
1773 action_msg->cfa_out_peek_offset);
1774 }
1775 if (error == EJUSTRETURN) {
1776 error = 0;
1777 }
1778 if (error != 0) {
1779 break;
1780 }
1781 if (action_msg->cfa_in_peek_offset != 0 ||
1782 action_msg->cfa_in_pass_offset != 0) {
1783 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1784 action_msg->cfa_in_pass_offset,
1785 action_msg->cfa_in_peek_offset);
1786 }
1787 if (error == EJUSTRETURN) {
1788 error = 0;
1789 }
1790 break;
1791
1792 case CFM_OP_DROP:
1793 #if VERDICT_DEBUG
1794 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1795 (uint64_t)VM_KERNEL_ADDRPERM(so),
1796 cfil_info->cfi_sock_id,
1797 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1798 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1799 #endif
1800 error = cfil_action_drop(so, cfil_info, kcunit);
1801 cfil_sock_received_verdict(so);
1802 break;
1803
1804 default:
1805 error = EINVAL;
1806 break;
1807 }
1808 unlock:
1809 socket_unlock(so, 1);
1810 done:
1811 mbuf_freem(m);
1812
1813 if (error == 0) {
1814 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1815 } else {
1816 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1817 }
1818
1819 return error;
1820 }
1821
1822 static errno_t
1823 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1824 int opt, void *data, size_t *len)
1825 {
1826 #pragma unused(kctlref, opt)
1827 struct cfil_info *cfil_info = NULL;
1828 errno_t error = 0;
1829 struct content_filter *cfc = (struct content_filter *)unitinfo;
1830
1831 CFIL_LOG(LOG_NOTICE, "");
1832
1833 cfil_rw_lock_shared(&cfil_lck_rw);
1834
1835 if (content_filters == NULL) {
1836 CFIL_LOG(LOG_ERR, "no content filter");
1837 error = EINVAL;
1838 goto done;
1839 }
1840 if (kcunit > MAX_CONTENT_FILTER) {
1841 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1842 kcunit, MAX_CONTENT_FILTER);
1843 error = EINVAL;
1844 goto done;
1845 }
1846 if (cfc != (void *)content_filters[kcunit - 1]) {
1847 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1848 kcunit);
1849 error = EINVAL;
1850 goto done;
1851 }
1852 switch (opt) {
1853 case CFIL_OPT_NECP_CONTROL_UNIT:
1854 if (*len < sizeof(uint32_t)) {
1855 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1856 error = EINVAL;
1857 goto done;
1858 }
1859 if (data != NULL) {
1860 *(uint32_t *)data = cfc->cf_necp_control_unit;
1861 }
1862 break;
1863 case CFIL_OPT_GET_SOCKET_INFO:
1864 if (*len != sizeof(struct cfil_opt_sock_info)) {
1865 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1866 error = EINVAL;
1867 goto done;
1868 }
1869 if (data == NULL) {
1870 CFIL_LOG(LOG_ERR, "data not passed");
1871 error = EINVAL;
1872 goto done;
1873 }
1874
1875 struct cfil_opt_sock_info *sock_info =
1876 (struct cfil_opt_sock_info *) data;
1877
1878 // Unlock here so that we never hold both cfil_lck_rw and the
1879 // socket_lock at the same time. Otherwise, this can deadlock
1880 // because soclose() takes the socket_lock and then exclusive
1881 // cfil_lck_rw and we require the opposite order.
1882
1883 // WARNING: Be sure to never use anything protected
1884 // by cfil_lck_rw beyond this point.
1885 // WARNING: Be sure to avoid fallthrough and
1886 // goto return_already_unlocked from this branch.
1887 cfil_rw_unlock_shared(&cfil_lck_rw);
1888
1889 // Search (TCP+UDP) and lock socket
1890 struct socket *sock =
1891 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1892 if (sock == NULL) {
1893 #if LIFECYCLE_DEBUG
1894 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1895 sock_info->cfs_sock_id);
1896 #endif
1897 error = ENOENT;
1898 goto return_already_unlocked;
1899 }
1900
1901 cfil_info = (sock->so_cfil_db != NULL) ?
1902 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
1903
1904 if (cfil_info == NULL) {
1905 #if LIFECYCLE_DEBUG
1906 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1907 (uint64_t)VM_KERNEL_ADDRPERM(sock));
1908 #endif
1909 error = EINVAL;
1910 socket_unlock(sock, 1);
1911 goto return_already_unlocked;
1912 }
1913
1914 // Fill out family, type, and protocol
1915 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1916 sock_info->cfs_sock_type = sock->so_proto->pr_type;
1917 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1918
1919 // Source and destination addresses
1920 struct inpcb *inp = sotoinpcb(sock);
1921 if (inp->inp_vflag & INP_IPV6) {
1922 struct in6_addr *laddr = NULL, *faddr = NULL;
1923 u_int16_t lport = 0, fport = 0;
1924
1925 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1926 &laddr, &faddr, &lport, &fport);
1927 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1928 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1929 } else if (inp->inp_vflag & INP_IPV4) {
1930 struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
1931 u_int16_t lport = 0, fport = 0;
1932
1933 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1934 &laddr, &faddr, &lport, &fport);
1935 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1936 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1937 }
1938
1939 // Set the pid info
1940 sock_info->cfs_pid = sock->last_pid;
1941 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1942
1943 if (sock->so_flags & SOF_DELEGATED) {
1944 sock_info->cfs_e_pid = sock->e_pid;
1945 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1946 } else {
1947 sock_info->cfs_e_pid = sock->last_pid;
1948 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1949 }
1950
1951 socket_unlock(sock, 1);
1952
1953 goto return_already_unlocked;
1954 default:
1955 error = ENOPROTOOPT;
1956 break;
1957 }
1958 done:
1959 cfil_rw_unlock_shared(&cfil_lck_rw);
1960
1961 return error;
1962
1963 return_already_unlocked:
1964
1965 return error;
1966 }
1967
1968 static errno_t
1969 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1970 int opt, void *data, size_t len)
1971 {
1972 #pragma unused(kctlref, opt)
1973 errno_t error = 0;
1974 struct content_filter *cfc = (struct content_filter *)unitinfo;
1975
1976 CFIL_LOG(LOG_NOTICE, "");
1977
1978 cfil_rw_lock_exclusive(&cfil_lck_rw);
1979
1980 if (content_filters == NULL) {
1981 CFIL_LOG(LOG_ERR, "no content filter");
1982 error = EINVAL;
1983 goto done;
1984 }
1985 if (kcunit > MAX_CONTENT_FILTER) {
1986 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1987 kcunit, MAX_CONTENT_FILTER);
1988 error = EINVAL;
1989 goto done;
1990 }
1991 if (cfc != (void *)content_filters[kcunit - 1]) {
1992 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1993 kcunit);
1994 error = EINVAL;
1995 goto done;
1996 }
1997 switch (opt) {
1998 case CFIL_OPT_NECP_CONTROL_UNIT:
1999 if (len < sizeof(uint32_t)) {
2000 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2001 "len too small %lu", len);
2002 error = EINVAL;
2003 goto done;
2004 }
2005 if (cfc->cf_necp_control_unit != 0) {
2006 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2007 "already set %u",
2008 cfc->cf_necp_control_unit);
2009 error = EINVAL;
2010 goto done;
2011 }
2012 cfc->cf_necp_control_unit = *(uint32_t *)data;
2013 break;
2014 default:
2015 error = ENOPROTOOPT;
2016 break;
2017 }
2018 done:
2019 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2020
2021 return error;
2022 }
2023
2024
2025 static void
2026 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2027 {
2028 #pragma unused(kctlref, flags)
2029 struct content_filter *cfc = (struct content_filter *)unitinfo;
2030 struct socket *so = NULL;
2031 int error;
2032 struct cfil_entry *entry;
2033 struct cfil_info *cfil_info = NULL;
2034
2035 CFIL_LOG(LOG_INFO, "");
2036
2037 if (content_filters == NULL) {
2038 CFIL_LOG(LOG_ERR, "no content filter");
2039 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2040 return;
2041 }
2042 if (kcunit > MAX_CONTENT_FILTER) {
2043 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2044 kcunit, MAX_CONTENT_FILTER);
2045 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2046 return;
2047 }
2048 cfil_rw_lock_shared(&cfil_lck_rw);
2049 if (cfc != (void *)content_filters[kcunit - 1]) {
2050 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2051 kcunit);
2052 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2053 goto done;
2054 }
2055 /* Let's assume the flow control is lifted */
2056 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2057 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2058 cfil_rw_lock_exclusive(&cfil_lck_rw);
2059 }
2060
2061 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2062
2063 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2064 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2065 }
2066 /*
2067 * Flow control will be raised again as soon as an entry cannot enqueue
2068 * to the kernel control socket
2069 */
2070 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2071 verify_content_filter(cfc);
2072
2073 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2074
2075 /* Find an entry that is flow controlled */
2076 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2077 if (entry->cfe_cfil_info == NULL ||
2078 entry->cfe_cfil_info->cfi_so == NULL) {
2079 continue;
2080 }
2081 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2082 continue;
2083 }
2084 }
2085 if (entry == NULL) {
2086 break;
2087 }
2088
2089 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2090
2091 cfil_info = entry->cfe_cfil_info;
2092 so = cfil_info->cfi_so;
2093
2094 cfil_rw_unlock_shared(&cfil_lck_rw);
2095 socket_lock(so, 1);
2096
2097 do {
2098 error = cfil_acquire_sockbuf(so, cfil_info, 1);
2099 if (error == 0) {
2100 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2101 }
2102 cfil_release_sockbuf(so, 1);
2103 if (error != 0) {
2104 break;
2105 }
2106
2107 error = cfil_acquire_sockbuf(so, cfil_info, 0);
2108 if (error == 0) {
2109 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2110 }
2111 cfil_release_sockbuf(so, 0);
2112 } while (0);
2113
2114 socket_lock_assert_owned(so);
2115 socket_unlock(so, 1);
2116
2117 cfil_rw_lock_shared(&cfil_lck_rw);
2118 }
2119 done:
2120 cfil_rw_unlock_shared(&cfil_lck_rw);
2121 }
2122
2123 void
2124 cfil_init(void)
2125 {
2126 struct kern_ctl_reg kern_ctl;
2127 errno_t error = 0;
2128 vm_size_t content_filter_size = 0; /* size of content_filter */
2129 vm_size_t cfil_info_size = 0; /* size of cfil_info */
2130 vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2131 vm_size_t cfil_db_size = 0; /* size of cfil_db */
2132 unsigned int mbuf_limit = 0;
2133
2134 CFIL_LOG(LOG_NOTICE, "");
2135
2136 /*
2137 * Compile time verifications
2138 */
2139 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2140 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2141 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2142 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2143
2144 /*
2145 * Runtime time verifications
2146 */
2147 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2148 sizeof(uint32_t)));
2149 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2150 sizeof(uint32_t)));
2151 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2152 sizeof(uint32_t)));
2153 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2154 sizeof(uint32_t)));
2155
2156 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2157 sizeof(uint32_t)));
2158 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2159 sizeof(uint32_t)));
2160
2161 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2162 sizeof(uint32_t)));
2163 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2164 sizeof(uint32_t)));
2165 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2166 sizeof(uint32_t)));
2167 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2168 sizeof(uint32_t)));
2169
2170 /*
2171 * Zone for content filters kernel control sockets
2172 */
2173 content_filter_size = sizeof(struct content_filter);
2174 content_filter_zone = zinit(content_filter_size,
2175 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2176 0,
2177 CONTENT_FILTER_ZONE_NAME);
2178 if (content_filter_zone == NULL) {
2179 panic("%s: zinit(%s) failed", __func__,
2180 CONTENT_FILTER_ZONE_NAME);
2181 /* NOTREACHED */
2182 }
2183 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2184 zone_change(content_filter_zone, Z_EXPAND, TRUE);
2185
2186 /*
2187 * Zone for per socket content filters
2188 */
2189 cfil_info_size = sizeof(struct cfil_info);
2190 cfil_info_zone = zinit(cfil_info_size,
2191 CFIL_INFO_ZONE_MAX * cfil_info_size,
2192 0,
2193 CFIL_INFO_ZONE_NAME);
2194 if (cfil_info_zone == NULL) {
2195 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2196 /* NOTREACHED */
2197 }
2198 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2199 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2200
2201 /*
2202 * Zone for content filters cfil hash entries and db
2203 */
2204 cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2205 cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2206 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2207 0,
2208 CFIL_HASH_ENTRY_ZONE_NAME);
2209 if (cfil_hash_entry_zone == NULL) {
2210 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2211 /* NOTREACHED */
2212 }
2213 zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2214 zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2215
2216 cfil_db_size = sizeof(struct cfil_db);
2217 cfil_db_zone = zinit(cfil_db_size,
2218 CFIL_DB_ZONE_MAX * cfil_db_size,
2219 0,
2220 CFIL_DB_ZONE_NAME);
2221 if (cfil_db_zone == NULL) {
2222 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2223 /* NOTREACHED */
2224 }
2225 zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2226 zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2227
2228 /*
2229 * Allocate locks
2230 */
2231 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2232 if (cfil_lck_grp_attr == NULL) {
2233 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2234 /* NOTREACHED */
2235 }
2236 cfil_lck_grp = lck_grp_alloc_init("content filter",
2237 cfil_lck_grp_attr);
2238 if (cfil_lck_grp == NULL) {
2239 panic("%s: lck_grp_alloc_init failed", __func__);
2240 /* NOTREACHED */
2241 }
2242 cfil_lck_attr = lck_attr_alloc_init();
2243 if (cfil_lck_attr == NULL) {
2244 panic("%s: lck_attr_alloc_init failed", __func__);
2245 /* NOTREACHED */
2246 }
2247 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2248
2249 TAILQ_INIT(&cfil_sock_head);
2250
2251 /*
2252 * Register kernel control
2253 */
2254 bzero(&kern_ctl, sizeof(kern_ctl));
2255 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2256 sizeof(kern_ctl.ctl_name));
2257 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2258 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2259 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2260 kern_ctl.ctl_connect = cfil_ctl_connect;
2261 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2262 kern_ctl.ctl_send = cfil_ctl_send;
2263 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2264 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2265 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2266 error = ctl_register(&kern_ctl, &cfil_kctlref);
2267 if (error != 0) {
2268 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2269 return;
2270 }
2271
2272 // Spawn thread for gargage collection
2273 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2274 &cfil_udp_gc_thread) != KERN_SUCCESS) {
2275 panic_plain("%s: Can't create UDP GC thread", __func__);
2276 /* NOTREACHED */
2277 }
2278 /* this must not fail */
2279 VERIFY(cfil_udp_gc_thread != NULL);
2280
2281 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2282 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2283 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2284 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2285 }
2286
2287 struct cfil_info *
2288 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2289 {
2290 int kcunit;
2291 struct cfil_info *cfil_info = NULL;
2292 struct inpcb *inp = sotoinpcb(so);
2293
2294 CFIL_LOG(LOG_INFO, "");
2295
2296 socket_lock_assert_owned(so);
2297
2298 cfil_info = zalloc(cfil_info_zone);
2299 if (cfil_info == NULL) {
2300 goto done;
2301 }
2302 bzero(cfil_info, sizeof(struct cfil_info));
2303
2304 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2305 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2306
2307 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2308 struct cfil_entry *entry;
2309
2310 entry = &cfil_info->cfi_entries[kcunit - 1];
2311 entry->cfe_cfil_info = cfil_info;
2312
2313 /* Initialize the filter entry */
2314 entry->cfe_filter = NULL;
2315 entry->cfe_flags = 0;
2316 entry->cfe_necp_control_unit = 0;
2317 entry->cfe_snd.cfe_pass_offset = 0;
2318 entry->cfe_snd.cfe_peek_offset = 0;
2319 entry->cfe_snd.cfe_peeked = 0;
2320 entry->cfe_rcv.cfe_pass_offset = 0;
2321 entry->cfe_rcv.cfe_peek_offset = 0;
2322 entry->cfe_rcv.cfe_peeked = 0;
2323 /*
2324 * Timestamp the last action to avoid pre-maturely
2325 * triggering garbage collection
2326 */
2327 microuptime(&entry->cfe_last_action);
2328
2329 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2330 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2331 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2332 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2333 }
2334
2335 cfil_rw_lock_exclusive(&cfil_lck_rw);
2336
2337 /*
2338 * Create a cfi_sock_id that's not the socket pointer!
2339 */
2340
2341 if (hash_entry == NULL) {
2342 // This is the TCP case, cfil_info is tracked per socket
2343 if (inp->inp_flowhash == 0) {
2344 inp->inp_flowhash = inp_calc_flowhash(inp);
2345 }
2346
2347 so->so_cfil = cfil_info;
2348 cfil_info->cfi_so = so;
2349 cfil_info->cfi_sock_id =
2350 ((so->so_gencnt << 32) | inp->inp_flowhash);
2351 } else {
2352 // This is the UDP case, cfil_info is tracked in per-socket hash
2353 cfil_info->cfi_so = so;
2354 hash_entry->cfentry_cfil = cfil_info;
2355 cfil_info->cfi_hash_entry = hash_entry;
2356 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2357 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2358 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2359
2360 // Wake up gc thread if this is first flow added
2361 if (cfil_sock_udp_attached_count == 0) {
2362 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2363 }
2364
2365 cfil_sock_udp_attached_count++;
2366 }
2367
2368 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2369 SLIST_INIT(&cfil_info->cfi_ordered_entries);
2370
2371 cfil_sock_attached_count++;
2372
2373 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2374
2375 done:
2376 if (cfil_info != NULL) {
2377 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2378 } else {
2379 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2380 }
2381
2382 return cfil_info;
2383 }
2384
2385 int
2386 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2387 {
2388 int kcunit;
2389 int attached = 0;
2390
2391 CFIL_LOG(LOG_INFO, "");
2392
2393 socket_lock_assert_owned(so);
2394
2395 cfil_rw_lock_exclusive(&cfil_lck_rw);
2396
2397 for (kcunit = 1;
2398 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2399 kcunit++) {
2400 struct content_filter *cfc = content_filters[kcunit - 1];
2401 struct cfil_entry *entry;
2402 struct cfil_entry *iter_entry;
2403 struct cfil_entry *iter_prev;
2404
2405 if (cfc == NULL) {
2406 continue;
2407 }
2408 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2409 continue;
2410 }
2411
2412 entry = &cfil_info->cfi_entries[kcunit - 1];
2413
2414 entry->cfe_filter = cfc;
2415 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2416 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2417 cfc->cf_sock_count++;
2418
2419 /* Insert the entry into the list ordered by control unit */
2420 iter_prev = NULL;
2421 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2422 if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2423 break;
2424 }
2425 iter_prev = iter_entry;
2426 }
2427
2428 if (iter_prev == NULL) {
2429 SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2430 } else {
2431 SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2432 }
2433
2434 verify_content_filter(cfc);
2435 attached = 1;
2436 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2437 }
2438
2439 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2440
2441 return attached;
2442 }
2443
2444 static void
2445 cfil_info_free(struct cfil_info *cfil_info)
2446 {
2447 int kcunit;
2448 uint64_t in_drain = 0;
2449 uint64_t out_drained = 0;
2450
2451 if (cfil_info == NULL) {
2452 return;
2453 }
2454
2455 CFIL_LOG(LOG_INFO, "");
2456
2457 cfil_rw_lock_exclusive(&cfil_lck_rw);
2458
2459 for (kcunit = 1;
2460 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2461 kcunit++) {
2462 struct cfil_entry *entry;
2463 struct content_filter *cfc;
2464
2465 entry = &cfil_info->cfi_entries[kcunit - 1];
2466
2467 /* Don't be silly and try to detach twice */
2468 if (entry->cfe_filter == NULL) {
2469 continue;
2470 }
2471
2472 cfc = content_filters[kcunit - 1];
2473
2474 VERIFY(cfc == entry->cfe_filter);
2475
2476 entry->cfe_filter = NULL;
2477 entry->cfe_necp_control_unit = 0;
2478 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2479 cfc->cf_sock_count--;
2480
2481 verify_content_filter(cfc);
2482 }
2483 if (cfil_info->cfi_hash_entry != NULL) {
2484 cfil_sock_udp_attached_count--;
2485 }
2486 cfil_sock_attached_count--;
2487 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2488
2489 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2490 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2491
2492 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2493 struct cfil_entry *entry;
2494
2495 entry = &cfil_info->cfi_entries[kcunit - 1];
2496 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2497 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2498 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2499 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2500 }
2501 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2502
2503 if (out_drained) {
2504 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2505 }
2506 if (in_drain) {
2507 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2508 }
2509
2510 zfree(cfil_info_zone, cfil_info);
2511 }
2512
2513 /*
2514 * Received a verdict from userspace for a socket.
2515 * Perform any delayed operation if needed.
2516 */
2517 static void
2518 cfil_sock_received_verdict(struct socket *so)
2519 {
2520 if (so == NULL || so->so_cfil == NULL) {
2521 return;
2522 }
2523
2524 so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2525
2526 /*
2527 * If socket has already been connected, trigger
2528 * soisconnected now.
2529 */
2530 if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2531 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2532 soisconnected(so);
2533 return;
2534 }
2535 }
2536
2537 /*
2538 * Entry point from Sockets layer
2539 * The socket is locked.
2540 *
2541 * Checks if a connected socket is subject to filter and
2542 * pending the initial verdict.
2543 */
2544 boolean_t
2545 cfil_sock_connected_pending_verdict(struct socket *so)
2546 {
2547 if (so == NULL || so->so_cfil == NULL) {
2548 return false;
2549 }
2550
2551 if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2552 return false;
2553 } else {
2554 /*
2555 * Remember that this protocol is already connected, so
2556 * we will trigger soisconnected() upon receipt of
2557 * initial verdict later.
2558 */
2559 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2560 return true;
2561 }
2562 }
2563
2564 boolean_t
2565 cfil_filter_present(void)
2566 {
2567 return cfil_active_count > 0;
2568 }
2569
2570 /*
2571 * Entry point from Sockets layer
2572 * The socket is locked.
2573 */
2574 errno_t
2575 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2576 {
2577 errno_t error = 0;
2578 uint32_t filter_control_unit;
2579
2580 socket_lock_assert_owned(so);
2581
2582 /* Limit ourselves to TCP that are not MPTCP subflows */
2583 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2584 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2585 so->so_proto->pr_type != SOCK_STREAM ||
2586 so->so_proto->pr_protocol != IPPROTO_TCP ||
2587 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2588 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2589 goto done;
2590 }
2591
2592 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2593 if (filter_control_unit == 0) {
2594 goto done;
2595 }
2596
2597 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2598 goto done;
2599 }
2600 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2601 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2602 goto done;
2603 }
2604 if (cfil_active_count == 0) {
2605 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2606 goto done;
2607 }
2608 if (so->so_cfil != NULL) {
2609 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2610 CFIL_LOG(LOG_ERR, "already attached");
2611 } else {
2612 cfil_info_alloc(so, NULL);
2613 if (so->so_cfil == NULL) {
2614 error = ENOMEM;
2615 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2616 goto done;
2617 }
2618 so->so_cfil->cfi_dir = dir;
2619 }
2620 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2621 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2622 filter_control_unit);
2623 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2624 goto done;
2625 }
2626 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2627 (uint64_t)VM_KERNEL_ADDRPERM(so),
2628 filter_control_unit, so->so_cfil->cfi_sock_id);
2629
2630 so->so_flags |= SOF_CONTENT_FILTER;
2631 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2632
2633 /* Hold a reference on the socket */
2634 so->so_usecount++;
2635
2636 /*
2637 * Save passed addresses for attach event msg (in case resend
2638 * is needed.
2639 */
2640 if (remote != NULL) {
2641 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2642 }
2643 if (local != NULL) {
2644 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2645 }
2646
2647 error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2648 /* We can recover from flow control or out of memory errors */
2649 if (error == ENOBUFS || error == ENOMEM) {
2650 error = 0;
2651 } else if (error != 0) {
2652 goto done;
2653 }
2654
2655 CFIL_INFO_VERIFY(so->so_cfil);
2656 done:
2657 return error;
2658 }
2659
2660 /*
2661 * Entry point from Sockets layer
2662 * The socket is locked.
2663 */
2664 errno_t
2665 cfil_sock_detach(struct socket *so)
2666 {
2667 if (IS_UDP(so)) {
2668 cfil_db_free(so);
2669 return 0;
2670 }
2671
2672 if (so->so_cfil) {
2673 if (so->so_flags & SOF_CONTENT_FILTER) {
2674 so->so_flags &= ~SOF_CONTENT_FILTER;
2675 VERIFY(so->so_usecount > 0);
2676 so->so_usecount--;
2677 }
2678 cfil_info_free(so->so_cfil);
2679 so->so_cfil = NULL;
2680 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2681 }
2682 return 0;
2683 }
2684
2685 /*
2686 * Fill in the address info of an event message from either
2687 * the socket or passed in address info.
2688 */
2689 static void
2690 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2691 union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2692 boolean_t isIPv4, boolean_t outgoing)
2693 {
2694 if (isIPv4) {
2695 struct in_addr laddr = {0}, faddr = {0};
2696 u_int16_t lport = 0, fport = 0;
2697
2698 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2699
2700 if (outgoing) {
2701 fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2702 fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2703 } else {
2704 fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2705 fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2706 }
2707 } else {
2708 struct in6_addr *laddr = NULL, *faddr = NULL;
2709 u_int16_t lport = 0, fport = 0;
2710
2711 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2712 if (outgoing) {
2713 fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2714 fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2715 } else {
2716 fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2717 fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2718 }
2719 }
2720 }
2721
2722 static boolean_t
2723 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2724 struct cfil_info *cfil_info,
2725 struct cfil_msg_sock_attached *msg)
2726 {
2727 struct cfil_crypto_data data = {};
2728
2729 if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2730 return false;
2731 }
2732
2733 data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2734 data.direction = msg->cfs_conn_dir;
2735
2736 data.pid = msg->cfs_pid;
2737 data.effective_pid = msg->cfs_e_pid;
2738 uuid_copy(data.uuid, msg->cfs_uuid);
2739 uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2740 data.socketProtocol = msg->cfs_sock_protocol;
2741 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2742 data.remote.sin6 = msg->cfs_dst.sin6;
2743 data.local.sin6 = msg->cfs_src.sin6;
2744 } else {
2745 data.remote.sin6 = msg->cfs_src.sin6;
2746 data.local.sin6 = msg->cfs_dst.sin6;
2747 }
2748
2749 // At attach, if local address is already present, no need to re-sign subsequent data messages.
2750 if (!NULLADDRESS(data.local)) {
2751 cfil_info->cfi_isSignatureLatest = true;
2752 }
2753
2754 msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2755 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2756 msg->cfs_signature_length = 0;
2757 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2758 msg->cfs_msghdr.cfm_sock_id);
2759 return false;
2760 }
2761
2762 return true;
2763 }
2764
2765 static boolean_t
2766 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2767 struct socket *so, struct cfil_info *cfil_info,
2768 struct cfil_msg_data_event *msg)
2769 {
2770 struct cfil_crypto_data data = {};
2771
2772 if (crypto_state == NULL || msg == NULL ||
2773 so == NULL || cfil_info == NULL) {
2774 return false;
2775 }
2776
2777 data.sock_id = cfil_info->cfi_sock_id;
2778 data.direction = cfil_info->cfi_dir;
2779 data.pid = so->last_pid;
2780 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2781 if (so->so_flags & SOF_DELEGATED) {
2782 data.effective_pid = so->e_pid;
2783 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2784 } else {
2785 data.effective_pid = so->last_pid;
2786 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2787 }
2788 data.socketProtocol = so->so_proto->pr_protocol;
2789
2790 if (data.direction == CFS_CONNECTION_DIR_OUT) {
2791 data.remote.sin6 = msg->cfc_dst.sin6;
2792 data.local.sin6 = msg->cfc_src.sin6;
2793 } else {
2794 data.remote.sin6 = msg->cfc_src.sin6;
2795 data.local.sin6 = msg->cfc_dst.sin6;
2796 }
2797
2798 // At first data, local address may show up for the first time, update address cache and
2799 // no need to re-sign subsequent data messages anymore.
2800 if (!NULLADDRESS(data.local)) {
2801 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
2802 cfil_info->cfi_isSignatureLatest = true;
2803 }
2804
2805 msg->cfd_signature_length = sizeof(cfil_crypto_signature);
2806 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
2807 msg->cfd_signature_length = 0;
2808 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
2809 msg->cfd_msghdr.cfm_sock_id);
2810 return false;
2811 }
2812
2813 return true;
2814 }
2815
2816 static boolean_t
2817 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
2818 struct socket *so, struct cfil_info *cfil_info,
2819 struct cfil_msg_sock_closed *msg)
2820 {
2821 struct cfil_crypto_data data = {};
2822 struct cfil_hash_entry hash_entry = {};
2823 struct cfil_hash_entry *hash_entry_ptr = NULL;
2824 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2825
2826 if (crypto_state == NULL || msg == NULL ||
2827 so == NULL || inp == NULL || cfil_info == NULL) {
2828 return false;
2829 }
2830
2831 data.sock_id = cfil_info->cfi_sock_id;
2832 data.direction = cfil_info->cfi_dir;
2833
2834 data.pid = so->last_pid;
2835 memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2836 if (so->so_flags & SOF_DELEGATED) {
2837 data.effective_pid = so->e_pid;
2838 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2839 } else {
2840 data.effective_pid = so->last_pid;
2841 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2842 }
2843 data.socketProtocol = so->so_proto->pr_protocol;
2844
2845 /*
2846 * Fill in address info:
2847 * For UDP, use the cfil_info hash entry directly.
2848 * For TCP, compose an hash entry with the saved addresses.
2849 */
2850 if (cfil_info->cfi_hash_entry != NULL) {
2851 hash_entry_ptr = cfil_info->cfi_hash_entry;
2852 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
2853 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
2854 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
2855 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
2856 hash_entry_ptr = &hash_entry;
2857 }
2858 if (hash_entry_ptr != NULL) {
2859 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
2860 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
2861 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
2862 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, inp->inp_vflag & INP_IPV4, outgoing);
2863 }
2864
2865 data.byte_count_in = cfil_info->cfi_byte_inbound_count;
2866 data.byte_count_out = cfil_info->cfi_byte_outbound_count;
2867
2868 msg->cfc_signature_length = sizeof(cfil_crypto_signature);
2869 if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
2870 msg->cfc_signature_length = 0;
2871 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
2872 msg->cfc_msghdr.cfm_sock_id);
2873 return false;
2874 }
2875
2876 return true;
2877 }
2878
2879 static int
2880 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
2881 uint32_t kcunit, int conn_dir)
2882 {
2883 errno_t error = 0;
2884 struct cfil_entry *entry = NULL;
2885 struct cfil_msg_sock_attached msg_attached;
2886 struct content_filter *cfc = NULL;
2887 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2888 struct cfil_hash_entry *hash_entry_ptr = NULL;
2889 struct cfil_hash_entry hash_entry;
2890
2891 memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
2892 proc_t p = PROC_NULL;
2893 task_t t = TASK_NULL;
2894
2895 socket_lock_assert_owned(so);
2896
2897 cfil_rw_lock_shared(&cfil_lck_rw);
2898
2899 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2900 error = EINVAL;
2901 goto done;
2902 }
2903
2904 if (kcunit == 0) {
2905 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
2906 } else {
2907 entry = &cfil_info->cfi_entries[kcunit - 1];
2908 }
2909
2910 if (entry == NULL) {
2911 goto done;
2912 }
2913
2914 cfc = entry->cfe_filter;
2915 if (cfc == NULL) {
2916 goto done;
2917 }
2918
2919 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
2920 goto done;
2921 }
2922
2923 if (kcunit == 0) {
2924 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
2925 }
2926
2927 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2928 (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
2929
2930 /* Would be wasteful to try when flow controlled */
2931 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2932 error = ENOBUFS;
2933 goto done;
2934 }
2935
2936 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2937 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2938 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2939 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2940 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2941 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2942
2943 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2944 msg_attached.cfs_sock_type = so->so_proto->pr_type;
2945 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2946 msg_attached.cfs_pid = so->last_pid;
2947 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2948 if (so->so_flags & SOF_DELEGATED) {
2949 msg_attached.cfs_e_pid = so->e_pid;
2950 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2951 } else {
2952 msg_attached.cfs_e_pid = so->last_pid;
2953 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2954 }
2955
2956 /*
2957 * Fill in address info:
2958 * For UDP, use the cfil_info hash entry directly.
2959 * For TCP, compose an hash entry with the saved addresses.
2960 */
2961 if (cfil_info->cfi_hash_entry != NULL) {
2962 hash_entry_ptr = cfil_info->cfi_hash_entry;
2963 } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
2964 cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
2965 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
2966 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
2967 hash_entry_ptr = &hash_entry;
2968 }
2969 if (hash_entry_ptr != NULL) {
2970 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
2971 &msg_attached.cfs_src, &msg_attached.cfs_dst,
2972 inp->inp_vflag & INP_IPV4, conn_dir == CFS_CONNECTION_DIR_OUT);
2973 }
2974 msg_attached.cfs_conn_dir = conn_dir;
2975
2976 if (msg_attached.cfs_e_pid != 0) {
2977 p = proc_find(msg_attached.cfs_e_pid);
2978 if (p != PROC_NULL) {
2979 t = proc_task(p);
2980 if (t != TASK_NULL) {
2981 audit_token_t audit_token;
2982 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
2983 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
2984 memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
2985 } else {
2986 CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
2987 entry->cfe_cfil_info->cfi_sock_id);
2988 }
2989 }
2990 proc_rele(p);
2991 }
2992 }
2993
2994 cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
2995
2996 #if LIFECYCLE_DEBUG
2997 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2998 entry->cfe_cfil_info->cfi_sock_id);
2999 #endif
3000
3001 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3002 entry->cfe_filter->cf_kcunit,
3003 &msg_attached,
3004 sizeof(struct cfil_msg_sock_attached),
3005 CTL_DATA_EOR);
3006 if (error != 0) {
3007 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3008 goto done;
3009 }
3010 microuptime(&entry->cfe_last_event);
3011 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3012 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3013
3014 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3015 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3016 done:
3017
3018 /* We can recover from flow control */
3019 if (error == ENOBUFS) {
3020 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3021 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3022
3023 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3024 cfil_rw_lock_exclusive(&cfil_lck_rw);
3025 }
3026
3027 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3028
3029 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3030 } else {
3031 if (error != 0) {
3032 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3033 }
3034
3035 cfil_rw_unlock_shared(&cfil_lck_rw);
3036 }
3037 return error;
3038 }
3039
3040 static int
3041 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3042 {
3043 errno_t error = 0;
3044 struct mbuf *msg = NULL;
3045 struct cfil_entry *entry;
3046 struct cfe_buf *entrybuf;
3047 struct cfil_msg_hdr msg_disconnected;
3048 struct content_filter *cfc;
3049
3050 socket_lock_assert_owned(so);
3051
3052 cfil_rw_lock_shared(&cfil_lck_rw);
3053
3054 entry = &cfil_info->cfi_entries[kcunit - 1];
3055 if (outgoing) {
3056 entrybuf = &entry->cfe_snd;
3057 } else {
3058 entrybuf = &entry->cfe_rcv;
3059 }
3060
3061 cfc = entry->cfe_filter;
3062 if (cfc == NULL) {
3063 goto done;
3064 }
3065
3066 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3067 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3068
3069 /*
3070 * Send the disconnection event once
3071 */
3072 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3073 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3074 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3075 (uint64_t)VM_KERNEL_ADDRPERM(so));
3076 goto done;
3077 }
3078
3079 /*
3080 * We're not disconnected as long as some data is waiting
3081 * to be delivered to the filter
3082 */
3083 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3084 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3085 (uint64_t)VM_KERNEL_ADDRPERM(so));
3086 error = EBUSY;
3087 goto done;
3088 }
3089 /* Would be wasteful to try when flow controlled */
3090 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3091 error = ENOBUFS;
3092 goto done;
3093 }
3094
3095 #if LIFECYCLE_DEBUG
3096 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3097 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3098 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3099 #endif
3100
3101 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3102 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3103 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3104 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3105 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3106 CFM_OP_DISCONNECT_IN;
3107 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3108 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3109 entry->cfe_filter->cf_kcunit,
3110 &msg_disconnected,
3111 sizeof(struct cfil_msg_hdr),
3112 CTL_DATA_EOR);
3113 if (error != 0) {
3114 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3115 mbuf_freem(msg);
3116 goto done;
3117 }
3118 microuptime(&entry->cfe_last_event);
3119 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3120
3121 /* Remember we have sent the disconnection message */
3122 if (outgoing) {
3123 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3124 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3125 } else {
3126 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3127 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3128 }
3129 done:
3130 if (error == ENOBUFS) {
3131 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3132 OSIncrementAtomic(
3133 &cfil_stats.cfs_disconnect_event_flow_control);
3134
3135 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3136 cfil_rw_lock_exclusive(&cfil_lck_rw);
3137 }
3138
3139 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3140
3141 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3142 } else {
3143 if (error != 0) {
3144 OSIncrementAtomic(
3145 &cfil_stats.cfs_disconnect_event_fail);
3146 }
3147
3148 cfil_rw_unlock_shared(&cfil_lck_rw);
3149 }
3150 return error;
3151 }
3152
3153 int
3154 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3155 {
3156 struct cfil_entry *entry;
3157 struct cfil_msg_sock_closed msg_closed;
3158 errno_t error = 0;
3159 struct content_filter *cfc;
3160
3161 socket_lock_assert_owned(so);
3162
3163 cfil_rw_lock_shared(&cfil_lck_rw);
3164
3165 entry = &cfil_info->cfi_entries[kcunit - 1];
3166 cfc = entry->cfe_filter;
3167 if (cfc == NULL) {
3168 goto done;
3169 }
3170
3171 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3172 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3173
3174 /* Would be wasteful to try when flow controlled */
3175 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3176 error = ENOBUFS;
3177 goto done;
3178 }
3179 /*
3180 * Send a single closed message per filter
3181 */
3182 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3183 goto done;
3184 }
3185 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3186 goto done;
3187 }
3188
3189 microuptime(&entry->cfe_last_event);
3190 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3191
3192 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3193 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3194 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3195 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3196 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3197 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3198 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3199 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3200 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3201 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3202 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3203 msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3204 msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3205
3206 cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3207
3208 #if LIFECYCLE_DEBUG
3209 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3210 #endif
3211 /* for debugging
3212 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3213 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
3214 * }
3215 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3216 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3217 * }
3218 */
3219
3220 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3221 entry->cfe_filter->cf_kcunit,
3222 &msg_closed,
3223 sizeof(struct cfil_msg_sock_closed),
3224 CTL_DATA_EOR);
3225 if (error != 0) {
3226 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3227 error);
3228 goto done;
3229 }
3230
3231 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3232 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3233 done:
3234 /* We can recover from flow control */
3235 if (error == ENOBUFS) {
3236 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3237 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3238
3239 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3240 cfil_rw_lock_exclusive(&cfil_lck_rw);
3241 }
3242
3243 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3244
3245 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3246 } else {
3247 if (error != 0) {
3248 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3249 }
3250
3251 cfil_rw_unlock_shared(&cfil_lck_rw);
3252 }
3253
3254 return error;
3255 }
3256
3257 static void
3258 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3259 struct in6_addr *ip6, u_int16_t port)
3260 {
3261 struct sockaddr_in6 *sin6 = &sin46->sin6;
3262
3263 sin6->sin6_family = AF_INET6;
3264 sin6->sin6_len = sizeof(*sin6);
3265 sin6->sin6_port = port;
3266 sin6->sin6_addr = *ip6;
3267 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3268 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3269 sin6->sin6_addr.s6_addr16[1] = 0;
3270 }
3271 }
3272
3273 static void
3274 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3275 struct in_addr ip, u_int16_t port)
3276 {
3277 struct sockaddr_in *sin = &sin46->sin;
3278
3279 sin->sin_family = AF_INET;
3280 sin->sin_len = sizeof(*sin);
3281 sin->sin_port = port;
3282 sin->sin_addr.s_addr = ip.s_addr;
3283 }
3284
3285 static void
3286 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3287 struct in6_addr **laddr, struct in6_addr **faddr,
3288 u_int16_t *lport, u_int16_t *fport)
3289 {
3290 if (entry != NULL) {
3291 *laddr = &entry->cfentry_laddr.addr6;
3292 *faddr = &entry->cfentry_faddr.addr6;
3293 *lport = entry->cfentry_lport;
3294 *fport = entry->cfentry_fport;
3295 } else {
3296 *laddr = &inp->in6p_laddr;
3297 *faddr = &inp->in6p_faddr;
3298 *lport = inp->inp_lport;
3299 *fport = inp->inp_fport;
3300 }
3301 }
3302
3303 static void
3304 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3305 struct in_addr *laddr, struct in_addr *faddr,
3306 u_int16_t *lport, u_int16_t *fport)
3307 {
3308 if (entry != NULL) {
3309 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3310 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3311 *lport = entry->cfentry_lport;
3312 *fport = entry->cfentry_fport;
3313 } else {
3314 *laddr = inp->inp_laddr;
3315 *faddr = inp->inp_faddr;
3316 *lport = inp->inp_lport;
3317 *fport = inp->inp_fport;
3318 }
3319 }
3320
3321 static int
3322 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3323 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3324 {
3325 errno_t error = 0;
3326 struct mbuf *copy = NULL;
3327 struct mbuf *msg = NULL;
3328 unsigned int one = 1;
3329 struct cfil_msg_data_event *data_req;
3330 size_t hdrsize;
3331 struct inpcb *inp = (struct inpcb *)so->so_pcb;
3332 struct cfil_entry *entry;
3333 struct cfe_buf *entrybuf;
3334 struct content_filter *cfc;
3335 struct timeval tv;
3336
3337 cfil_rw_lock_shared(&cfil_lck_rw);
3338
3339 entry = &cfil_info->cfi_entries[kcunit - 1];
3340 if (outgoing) {
3341 entrybuf = &entry->cfe_snd;
3342 } else {
3343 entrybuf = &entry->cfe_rcv;
3344 }
3345
3346 cfc = entry->cfe_filter;
3347 if (cfc == NULL) {
3348 goto done;
3349 }
3350
3351 data = cfil_data_start(data);
3352 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3353 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3354 goto done;
3355 }
3356
3357 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3358 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3359
3360 socket_lock_assert_owned(so);
3361
3362 /* Would be wasteful to try */
3363 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3364 error = ENOBUFS;
3365 goto done;
3366 }
3367
3368 /* Make a copy of the data to pass to kernel control socket */
3369 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3370 M_COPYM_NOOP_HDR);
3371 if (copy == NULL) {
3372 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3373 error = ENOMEM;
3374 goto done;
3375 }
3376
3377 /* We need an mbuf packet for the message header */
3378 hdrsize = sizeof(struct cfil_msg_data_event);
3379 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3380 if (error != 0) {
3381 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3382 m_freem(copy);
3383 /*
3384 * ENOBUFS is to indicate flow control
3385 */
3386 error = ENOMEM;
3387 goto done;
3388 }
3389 mbuf_setlen(msg, hdrsize);
3390 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3391 msg->m_next = copy;
3392 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3393 bzero(data_req, hdrsize);
3394 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3395 data_req->cfd_msghdr.cfm_version = 1;
3396 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3397 data_req->cfd_msghdr.cfm_op =
3398 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3399 data_req->cfd_msghdr.cfm_sock_id =
3400 entry->cfe_cfil_info->cfi_sock_id;
3401 data_req->cfd_start_offset = entrybuf->cfe_peeked;
3402 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3403
3404 /*
3405 * Copy address/port into event msg.
3406 * For non connected sockets need to copy addresses from passed
3407 * parameters
3408 */
3409 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3410 &data_req->cfc_src, &data_req->cfc_dst,
3411 inp->inp_vflag & INP_IPV4, outgoing);
3412
3413 if (cfil_info->cfi_isSignatureLatest == false) {
3414 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3415 }
3416
3417 microuptime(&tv);
3418 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3419
3420 /* Pass the message to the content filter */
3421 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3422 entry->cfe_filter->cf_kcunit,
3423 msg, CTL_DATA_EOR);
3424 if (error != 0) {
3425 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3426 mbuf_freem(msg);
3427 goto done;
3428 }
3429 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3430 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3431
3432 #if VERDICT_DEBUG
3433 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3434 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3435 #endif
3436
3437 done:
3438 if (error == ENOBUFS) {
3439 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3440 OSIncrementAtomic(
3441 &cfil_stats.cfs_data_event_flow_control);
3442
3443 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3444 cfil_rw_lock_exclusive(&cfil_lck_rw);
3445 }
3446
3447 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3448
3449 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3450 } else {
3451 if (error != 0) {
3452 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3453 }
3454
3455 cfil_rw_unlock_shared(&cfil_lck_rw);
3456 }
3457 return error;
3458 }
3459
3460 /*
3461 * Process the queue of data waiting to be delivered to content filter
3462 */
3463 static int
3464 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3465 {
3466 errno_t error = 0;
3467 struct mbuf *data, *tmp = NULL;
3468 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3469 struct cfil_entry *entry;
3470 struct cfe_buf *entrybuf;
3471 uint64_t currentoffset = 0;
3472
3473 if (cfil_info == NULL) {
3474 return 0;
3475 }
3476
3477 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3478 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3479
3480 socket_lock_assert_owned(so);
3481
3482 entry = &cfil_info->cfi_entries[kcunit - 1];
3483 if (outgoing) {
3484 entrybuf = &entry->cfe_snd;
3485 } else {
3486 entrybuf = &entry->cfe_rcv;
3487 }
3488
3489 /* Send attached message if not yet done */
3490 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3491 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3492 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3493 if (error != 0) {
3494 /* We can recover from flow control */
3495 if (error == ENOBUFS || error == ENOMEM) {
3496 error = 0;
3497 }
3498 goto done;
3499 }
3500 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3501 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3502 goto done;
3503 }
3504
3505 #if DATA_DEBUG
3506 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3507 entrybuf->cfe_pass_offset,
3508 entrybuf->cfe_peeked,
3509 entrybuf->cfe_peek_offset);
3510 #endif
3511
3512 /* Move all data that can pass */
3513 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3514 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3515 datalen = cfil_data_length(data, NULL, NULL);
3516 tmp = data;
3517
3518 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3519 entrybuf->cfe_pass_offset) {
3520 /*
3521 * The first mbuf can fully pass
3522 */
3523 copylen = datalen;
3524 } else {
3525 /*
3526 * The first mbuf can partially pass
3527 */
3528 copylen = entrybuf->cfe_pass_offset -
3529 entrybuf->cfe_ctl_q.q_start;
3530 }
3531 VERIFY(copylen <= datalen);
3532
3533 #if DATA_DEBUG
3534 CFIL_LOG(LOG_DEBUG,
3535 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3536 "datalen %u copylen %u",
3537 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3538 entrybuf->cfe_ctl_q.q_start,
3539 entrybuf->cfe_peeked,
3540 entrybuf->cfe_pass_offset,
3541 entrybuf->cfe_peek_offset,
3542 datalen, copylen);
3543 #endif
3544
3545 /*
3546 * Data that passes has been peeked at explicitly or
3547 * implicitly
3548 */
3549 if (entrybuf->cfe_ctl_q.q_start + copylen >
3550 entrybuf->cfe_peeked) {
3551 entrybuf->cfe_peeked =
3552 entrybuf->cfe_ctl_q.q_start + copylen;
3553 }
3554 /*
3555 * Stop on partial pass
3556 */
3557 if (copylen < datalen) {
3558 break;
3559 }
3560
3561 /* All good, move full data from ctl queue to pending queue */
3562 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3563
3564 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3565 if (outgoing) {
3566 OSAddAtomic64(datalen,
3567 &cfil_stats.cfs_pending_q_out_enqueued);
3568 } else {
3569 OSAddAtomic64(datalen,
3570 &cfil_stats.cfs_pending_q_in_enqueued);
3571 }
3572 }
3573 CFIL_INFO_VERIFY(cfil_info);
3574 if (tmp != NULL) {
3575 CFIL_LOG(LOG_DEBUG,
3576 "%llx first %llu peeked %llu pass %llu peek %llu"
3577 "datalen %u copylen %u",
3578 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3579 entrybuf->cfe_ctl_q.q_start,
3580 entrybuf->cfe_peeked,
3581 entrybuf->cfe_pass_offset,
3582 entrybuf->cfe_peek_offset,
3583 datalen, copylen);
3584 }
3585 tmp = NULL;
3586
3587 /* Now deal with remaining data the filter wants to peek at */
3588 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3589 currentoffset = entrybuf->cfe_ctl_q.q_start;
3590 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3591 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3592 currentoffset += datalen) {
3593 datalen = cfil_data_length(data, NULL, NULL);
3594 tmp = data;
3595
3596 /* We've already peeked at this mbuf */
3597 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3598 continue;
3599 }
3600 /*
3601 * The data in the first mbuf may have been
3602 * partially peeked at
3603 */
3604 copyoffset = entrybuf->cfe_peeked - currentoffset;
3605 VERIFY(copyoffset < datalen);
3606 copylen = datalen - copyoffset;
3607 VERIFY(copylen <= datalen);
3608 /*
3609 * Do not copy more than needed
3610 */
3611 if (currentoffset + copyoffset + copylen >
3612 entrybuf->cfe_peek_offset) {
3613 copylen = entrybuf->cfe_peek_offset -
3614 (currentoffset + copyoffset);
3615 }
3616
3617 #if DATA_DEBUG
3618 CFIL_LOG(LOG_DEBUG,
3619 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3620 "datalen %u copylen %u copyoffset %u",
3621 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3622 currentoffset,
3623 entrybuf->cfe_peeked,
3624 entrybuf->cfe_pass_offset,
3625 entrybuf->cfe_peek_offset,
3626 datalen, copylen, copyoffset);
3627 #endif
3628
3629 /*
3630 * Stop if there is nothing more to peek at
3631 */
3632 if (copylen == 0) {
3633 break;
3634 }
3635 /*
3636 * Let the filter get a peek at this span of data
3637 */
3638 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3639 outgoing, data, copyoffset, copylen);
3640 if (error != 0) {
3641 /* On error, leave data in ctl_q */
3642 break;
3643 }
3644 entrybuf->cfe_peeked += copylen;
3645 if (outgoing) {
3646 OSAddAtomic64(copylen,
3647 &cfil_stats.cfs_ctl_q_out_peeked);
3648 } else {
3649 OSAddAtomic64(copylen,
3650 &cfil_stats.cfs_ctl_q_in_peeked);
3651 }
3652
3653 /* Stop when data could not be fully peeked at */
3654 if (copylen + copyoffset < datalen) {
3655 break;
3656 }
3657 }
3658 CFIL_INFO_VERIFY(cfil_info);
3659 if (tmp != NULL) {
3660 CFIL_LOG(LOG_DEBUG,
3661 "%llx first %llu peeked %llu pass %llu peek %llu"
3662 "datalen %u copylen %u copyoffset %u",
3663 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3664 currentoffset,
3665 entrybuf->cfe_peeked,
3666 entrybuf->cfe_pass_offset,
3667 entrybuf->cfe_peek_offset,
3668 datalen, copylen, copyoffset);
3669 }
3670
3671 /*
3672 * Process data that has passed the filter
3673 */
3674 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3675 if (error != 0) {
3676 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3677 error);
3678 goto done;
3679 }
3680
3681 /*
3682 * Dispatch disconnect events that could not be sent
3683 */
3684 if (cfil_info == NULL) {
3685 goto done;
3686 } else if (outgoing) {
3687 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3688 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3689 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3690 }
3691 } else {
3692 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3693 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3694 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3695 }
3696 }
3697
3698 done:
3699 CFIL_LOG(LOG_DEBUG,
3700 "first %llu peeked %llu pass %llu peek %llu",
3701 entrybuf->cfe_ctl_q.q_start,
3702 entrybuf->cfe_peeked,
3703 entrybuf->cfe_pass_offset,
3704 entrybuf->cfe_peek_offset);
3705
3706 CFIL_INFO_VERIFY(cfil_info);
3707 return error;
3708 }
3709
3710 /*
3711 * cfil_data_filter()
3712 *
3713 * Process data for a content filter installed on a socket
3714 */
3715 int
3716 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3717 struct mbuf *data, uint64_t datalen)
3718 {
3719 errno_t error = 0;
3720 struct cfil_entry *entry;
3721 struct cfe_buf *entrybuf;
3722
3723 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3724 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3725
3726 socket_lock_assert_owned(so);
3727
3728 entry = &cfil_info->cfi_entries[kcunit - 1];
3729 if (outgoing) {
3730 entrybuf = &entry->cfe_snd;
3731 } else {
3732 entrybuf = &entry->cfe_rcv;
3733 }
3734
3735 /* Are we attached to the filter? */
3736 if (entry->cfe_filter == NULL) {
3737 error = 0;
3738 goto done;
3739 }
3740
3741 /* Dispatch to filters */
3742 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3743 if (outgoing) {
3744 OSAddAtomic64(datalen,
3745 &cfil_stats.cfs_ctl_q_out_enqueued);
3746 } else {
3747 OSAddAtomic64(datalen,
3748 &cfil_stats.cfs_ctl_q_in_enqueued);
3749 }
3750
3751 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3752 if (error != 0) {
3753 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3754 error);
3755 }
3756 /*
3757 * We have to return EJUSTRETURN in all cases to avoid double free
3758 * by socket layer
3759 */
3760 error = EJUSTRETURN;
3761 done:
3762 CFIL_INFO_VERIFY(cfil_info);
3763
3764 CFIL_LOG(LOG_INFO, "return %d", error);
3765 return error;
3766 }
3767
3768 /*
3769 * cfil_service_inject_queue() re-inject data that passed the
3770 * content filters
3771 */
3772 static int
3773 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3774 {
3775 mbuf_t data;
3776 unsigned int datalen;
3777 int mbcnt = 0;
3778 int mbnum = 0;
3779 errno_t error = 0;
3780 struct cfi_buf *cfi_buf;
3781 struct cfil_queue *inject_q;
3782 int need_rwakeup = 0;
3783 int count = 0;
3784
3785 if (cfil_info == NULL) {
3786 return 0;
3787 }
3788
3789 socket_lock_assert_owned(so);
3790
3791 if (outgoing) {
3792 cfi_buf = &cfil_info->cfi_snd;
3793 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3794 } else {
3795 cfi_buf = &cfil_info->cfi_rcv;
3796 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3797 }
3798 inject_q = &cfi_buf->cfi_inject_q;
3799
3800 if (cfil_queue_empty(inject_q)) {
3801 return 0;
3802 }
3803
3804 #if DATA_DEBUG | VERDICT_DEBUG
3805 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3806 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3807 #endif
3808
3809 while ((data = cfil_queue_first(inject_q)) != NULL) {
3810 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3811
3812 #if DATA_DEBUG
3813 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3814 remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3815 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3816 #endif
3817
3818 /* Remove data from queue and adjust stats */
3819 cfil_queue_remove(inject_q, data, datalen);
3820 cfi_buf->cfi_pending_first += datalen;
3821 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3822 cfi_buf->cfi_pending_mbnum -= mbnum;
3823 cfil_info_buf_verify(cfi_buf);
3824
3825 if (outgoing) {
3826 error = sosend_reinject(so, NULL, data, NULL, 0);
3827 if (error != 0) {
3828 #if DATA_DEBUG
3829 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3830 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3831 #endif
3832 break;
3833 }
3834 // At least one injection succeeded, need to wake up pending threads.
3835 need_rwakeup = 1;
3836 } else {
3837 data->m_flags |= M_SKIPCFIL;
3838
3839 /*
3840 * NOTE: We currently only support TCP and UDP.
3841 * For RAWIP, MPTCP and message TCP we'll
3842 * need to call the appropriate sbappendxxx()
3843 * of fix sock_inject_data_in()
3844 */
3845 if (IS_UDP(so) == TRUE) {
3846 if (sbappendchain(&so->so_rcv, data, 0)) {
3847 need_rwakeup = 1;
3848 }
3849 } else {
3850 if (sbappendstream(&so->so_rcv, data)) {
3851 need_rwakeup = 1;
3852 }
3853 }
3854 }
3855
3856 if (outgoing) {
3857 OSAddAtomic64(datalen,
3858 &cfil_stats.cfs_inject_q_out_passed);
3859 } else {
3860 OSAddAtomic64(datalen,
3861 &cfil_stats.cfs_inject_q_in_passed);
3862 }
3863
3864 count++;
3865 }
3866
3867 #if DATA_DEBUG | VERDICT_DEBUG
3868 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3869 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
3870 #endif
3871
3872 /* A single wakeup is for several packets is more efficient */
3873 if (need_rwakeup) {
3874 if (outgoing == TRUE) {
3875 sowwakeup(so);
3876 } else {
3877 sorwakeup(so);
3878 }
3879 }
3880
3881 if (error != 0 && cfil_info) {
3882 if (error == ENOBUFS) {
3883 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
3884 }
3885 if (error == ENOMEM) {
3886 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
3887 }
3888
3889 if (outgoing) {
3890 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
3891 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3892 } else {
3893 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
3894 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3895 }
3896 }
3897
3898 /*
3899 * Notify
3900 */
3901 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
3902 cfil_sock_notify_shutdown(so, SHUT_WR);
3903 if (cfil_sock_data_pending(&so->so_snd) == 0) {
3904 soshutdownlock_final(so, SHUT_WR);
3905 }
3906 }
3907 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3908 if (cfil_filters_attached(so) == 0) {
3909 CFIL_LOG(LOG_INFO, "so %llx waking",
3910 (uint64_t)VM_KERNEL_ADDRPERM(so));
3911 wakeup((caddr_t)cfil_info);
3912 }
3913 }
3914
3915 CFIL_INFO_VERIFY(cfil_info);
3916
3917 return error;
3918 }
3919
3920 static int
3921 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3922 {
3923 uint64_t passlen, curlen;
3924 mbuf_t data;
3925 unsigned int datalen;
3926 errno_t error = 0;
3927 struct cfil_entry *entry;
3928 struct cfe_buf *entrybuf;
3929 struct cfil_queue *pending_q;
3930
3931 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3932 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3933
3934 socket_lock_assert_owned(so);
3935
3936 entry = &cfil_info->cfi_entries[kcunit - 1];
3937 if (outgoing) {
3938 entrybuf = &entry->cfe_snd;
3939 } else {
3940 entrybuf = &entry->cfe_rcv;
3941 }
3942
3943 pending_q = &entrybuf->cfe_pending_q;
3944
3945 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3946
3947 /*
3948 * Locate the chunks of data that we can pass to the next filter
3949 * A data chunk must be on mbuf boundaries
3950 */
3951 curlen = 0;
3952 while ((data = cfil_queue_first(pending_q)) != NULL) {
3953 struct cfil_entry *iter_entry;
3954 datalen = cfil_data_length(data, NULL, NULL);
3955
3956 #if DATA_DEBUG
3957 CFIL_LOG(LOG_DEBUG,
3958 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3959 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3960 passlen, curlen);
3961 #endif
3962
3963 if (curlen + datalen > passlen) {
3964 break;
3965 }
3966
3967 cfil_queue_remove(pending_q, data, datalen);
3968
3969 curlen += datalen;
3970
3971 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
3972 iter_entry != NULL;
3973 iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
3974 error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
3975 data, datalen);
3976 /* 0 means passed so we can continue */
3977 if (error != 0) {
3978 break;
3979 }
3980 }
3981 /* When data has passed all filters, re-inject */
3982 if (error == 0) {
3983 if (outgoing) {
3984 cfil_queue_enqueue(
3985 &cfil_info->cfi_snd.cfi_inject_q,
3986 data, datalen);
3987 OSAddAtomic64(datalen,
3988 &cfil_stats.cfs_inject_q_out_enqueued);
3989 } else {
3990 cfil_queue_enqueue(
3991 &cfil_info->cfi_rcv.cfi_inject_q,
3992 data, datalen);
3993 OSAddAtomic64(datalen,
3994 &cfil_stats.cfs_inject_q_in_enqueued);
3995 }
3996 }
3997 }
3998
3999 CFIL_INFO_VERIFY(cfil_info);
4000
4001 return error;
4002 }
4003
4004 int
4005 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4006 uint64_t pass_offset, uint64_t peek_offset)
4007 {
4008 errno_t error = 0;
4009 struct cfil_entry *entry = NULL;
4010 struct cfe_buf *entrybuf;
4011 int updated = 0;
4012
4013 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4014
4015 socket_lock_assert_owned(so);
4016
4017 if (cfil_info == NULL) {
4018 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4019 (uint64_t)VM_KERNEL_ADDRPERM(so));
4020 error = 0;
4021 goto done;
4022 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4023 CFIL_LOG(LOG_ERR, "so %llx drop set",
4024 (uint64_t)VM_KERNEL_ADDRPERM(so));
4025 error = EPIPE;
4026 goto done;
4027 }
4028
4029 entry = &cfil_info->cfi_entries[kcunit - 1];
4030 if (outgoing) {
4031 entrybuf = &entry->cfe_snd;
4032 } else {
4033 entrybuf = &entry->cfe_rcv;
4034 }
4035
4036 /* Record updated offsets for this content filter */
4037 if (pass_offset > entrybuf->cfe_pass_offset) {
4038 entrybuf->cfe_pass_offset = pass_offset;
4039
4040 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4041 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4042 }
4043 updated = 1;
4044 } else {
4045 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4046 pass_offset, entrybuf->cfe_pass_offset);
4047 }
4048 /* Filter does not want or need to see data that's allowed to pass */
4049 if (peek_offset > entrybuf->cfe_pass_offset &&
4050 peek_offset > entrybuf->cfe_peek_offset) {
4051 entrybuf->cfe_peek_offset = peek_offset;
4052 updated = 1;
4053 }
4054 /* Nothing to do */
4055 if (updated == 0) {
4056 goto done;
4057 }
4058
4059 /* Move data held in control queue to pending queue if needed */
4060 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4061 if (error != 0) {
4062 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4063 error);
4064 goto done;
4065 }
4066 error = EJUSTRETURN;
4067
4068 done:
4069 /*
4070 * The filter is effectively detached when pass all from both sides
4071 * or when the socket is closed and no more data is waiting
4072 * to be delivered to the filter
4073 */
4074 if (entry != NULL &&
4075 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4076 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4077 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4078 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4079 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4080 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4081 #if LIFECYCLE_DEBUG
4082 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4083 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4084 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4085 #endif
4086 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4087 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4088 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4089 cfil_filters_attached(so) == 0) {
4090 #if LIFECYCLE_DEBUG
4091 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4092 #endif
4093 CFIL_LOG(LOG_INFO, "so %llx waking",
4094 (uint64_t)VM_KERNEL_ADDRPERM(so));
4095 wakeup((caddr_t)cfil_info);
4096 }
4097 }
4098 CFIL_INFO_VERIFY(cfil_info);
4099 CFIL_LOG(LOG_INFO, "return %d", error);
4100 return error;
4101 }
4102
4103 /*
4104 * Update pass offset for socket when no data is pending
4105 */
4106 static int
4107 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4108 {
4109 struct cfi_buf *cfi_buf;
4110 struct cfil_entry *entry;
4111 struct cfe_buf *entrybuf;
4112 uint32_t kcunit;
4113 uint64_t pass_offset = 0;
4114
4115 if (cfil_info == NULL) {
4116 return 0;
4117 }
4118
4119 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4120 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4121
4122 socket_lock_assert_owned(so);
4123
4124 if (outgoing) {
4125 cfi_buf = &cfil_info->cfi_snd;
4126 } else {
4127 cfi_buf = &cfil_info->cfi_rcv;
4128 }
4129
4130 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4131 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4132 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4133
4134 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4135 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4136 entry = &cfil_info->cfi_entries[kcunit - 1];
4137
4138 /* Are we attached to a filter? */
4139 if (entry->cfe_filter == NULL) {
4140 continue;
4141 }
4142
4143 if (outgoing) {
4144 entrybuf = &entry->cfe_snd;
4145 } else {
4146 entrybuf = &entry->cfe_rcv;
4147 }
4148
4149 if (pass_offset == 0 ||
4150 entrybuf->cfe_pass_offset < pass_offset) {
4151 pass_offset = entrybuf->cfe_pass_offset;
4152 }
4153 }
4154 cfi_buf->cfi_pass_offset = pass_offset;
4155 }
4156
4157 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4158 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4159
4160 return 0;
4161 }
4162
4163 int
4164 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4165 uint64_t pass_offset, uint64_t peek_offset)
4166 {
4167 errno_t error = 0;
4168
4169 CFIL_LOG(LOG_INFO, "");
4170
4171 socket_lock_assert_owned(so);
4172
4173 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4174 if (error != 0) {
4175 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4176 (uint64_t)VM_KERNEL_ADDRPERM(so),
4177 outgoing ? "out" : "in");
4178 goto release;
4179 }
4180
4181 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4182 pass_offset, peek_offset);
4183
4184 cfil_service_inject_queue(so, cfil_info, outgoing);
4185
4186 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4187 release:
4188 CFIL_INFO_VERIFY(cfil_info);
4189 cfil_release_sockbuf(so, outgoing);
4190
4191 return error;
4192 }
4193
4194
4195 static void
4196 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4197 {
4198 struct cfil_entry *entry;
4199 int kcunit;
4200 uint64_t drained;
4201
4202 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4203 goto done;
4204 }
4205
4206 socket_lock_assert_owned(so);
4207
4208 /*
4209 * Flush the output queues and ignore errors as long as
4210 * we are attached
4211 */
4212 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4213 if (cfil_info != NULL) {
4214 drained = 0;
4215 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4216 entry = &cfil_info->cfi_entries[kcunit - 1];
4217
4218 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4219 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4220 }
4221 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4222
4223 if (drained) {
4224 if (cfil_info->cfi_flags & CFIF_DROP) {
4225 OSIncrementAtomic(
4226 &cfil_stats.cfs_flush_out_drop);
4227 } else {
4228 OSIncrementAtomic(
4229 &cfil_stats.cfs_flush_out_close);
4230 }
4231 }
4232 }
4233 cfil_release_sockbuf(so, 1);
4234
4235 /*
4236 * Flush the input queues
4237 */
4238 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4239 if (cfil_info != NULL) {
4240 drained = 0;
4241 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4242 entry = &cfil_info->cfi_entries[kcunit - 1];
4243
4244 drained += cfil_queue_drain(
4245 &entry->cfe_rcv.cfe_ctl_q);
4246 drained += cfil_queue_drain(
4247 &entry->cfe_rcv.cfe_pending_q);
4248 }
4249 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4250
4251 if (drained) {
4252 if (cfil_info->cfi_flags & CFIF_DROP) {
4253 OSIncrementAtomic(
4254 &cfil_stats.cfs_flush_in_drop);
4255 } else {
4256 OSIncrementAtomic(
4257 &cfil_stats.cfs_flush_in_close);
4258 }
4259 }
4260 }
4261 cfil_release_sockbuf(so, 0);
4262 done:
4263 CFIL_INFO_VERIFY(cfil_info);
4264 }
4265
4266 int
4267 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4268 {
4269 errno_t error = 0;
4270 struct cfil_entry *entry;
4271 struct proc *p;
4272
4273 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4274 goto done;
4275 }
4276
4277 socket_lock_assert_owned(so);
4278
4279 entry = &cfil_info->cfi_entries[kcunit - 1];
4280
4281 /* Are we attached to the filter? */
4282 if (entry->cfe_filter == NULL) {
4283 goto done;
4284 }
4285
4286 cfil_info->cfi_flags |= CFIF_DROP;
4287
4288 p = current_proc();
4289
4290 /*
4291 * Force the socket to be marked defunct
4292 * (forcing fixed along with rdar://19391339)
4293 */
4294 if (so->so_cfil_db == NULL) {
4295 error = sosetdefunct(p, so,
4296 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4297 FALSE);
4298
4299 /* Flush the socket buffer and disconnect */
4300 if (error == 0) {
4301 error = sodefunct(p, so,
4302 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4303 }
4304 }
4305
4306 /* The filter is done, mark as detached */
4307 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4308 #if LIFECYCLE_DEBUG
4309 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4310 #endif
4311 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4312 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4313
4314 /* Pending data needs to go */
4315 cfil_flush_queues(so, cfil_info);
4316
4317 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4318 if (cfil_filters_attached(so) == 0) {
4319 CFIL_LOG(LOG_INFO, "so %llx waking",
4320 (uint64_t)VM_KERNEL_ADDRPERM(so));
4321 wakeup((caddr_t)cfil_info);
4322 }
4323 }
4324 done:
4325 return error;
4326 }
4327
4328 int
4329 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4330 {
4331 errno_t error = 0;
4332 struct cfil_info *cfil_info = NULL;
4333
4334 bool cfil_attached = false;
4335 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4336
4337 // Search and lock socket
4338 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4339 if (so == NULL) {
4340 error = ENOENT;
4341 } else {
4342 // The client gets a pass automatically
4343 cfil_info = (so->so_cfil_db != NULL) ?
4344 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4345
4346 if (cfil_attached) {
4347 #if VERDICT_DEBUG
4348 if (cfil_info != NULL) {
4349 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4350 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4351 (uint64_t)VM_KERNEL_ADDRPERM(so),
4352 cfil_info->cfi_sock_id);
4353 }
4354 #endif
4355 cfil_sock_received_verdict(so);
4356 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4357 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4358 } else {
4359 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4360 }
4361 socket_unlock(so, 1);
4362 }
4363
4364 return error;
4365 }
4366
4367 int
4368 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4369 {
4370 struct content_filter *cfc = NULL;
4371 cfil_crypto_state_t crypto_state = NULL;
4372 struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4373
4374 CFIL_LOG(LOG_NOTICE, "");
4375
4376 if (content_filters == NULL) {
4377 CFIL_LOG(LOG_ERR, "no content filter");
4378 return EINVAL;
4379 }
4380 if (kcunit > MAX_CONTENT_FILTER) {
4381 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4382 kcunit, MAX_CONTENT_FILTER);
4383 return EINVAL;
4384 }
4385 crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4386 if (crypto_state == NULL) {
4387 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4388 kcunit);
4389 return EINVAL;
4390 }
4391
4392 cfil_rw_lock_exclusive(&cfil_lck_rw);
4393
4394 cfc = content_filters[kcunit - 1];
4395 if (cfc->cf_kcunit != kcunit) {
4396 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4397 kcunit);
4398 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4399 cfil_crypto_cleanup_state(crypto_state);
4400 return EINVAL;
4401 }
4402 if (cfc->cf_crypto_state != NULL) {
4403 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4404 cfc->cf_crypto_state = NULL;
4405 }
4406 cfc->cf_crypto_state = crypto_state;
4407
4408 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4409 return 0;
4410 }
4411
4412 static int
4413 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4414 {
4415 struct cfil_entry *entry;
4416 struct cfe_buf *entrybuf;
4417 uint32_t kcunit;
4418
4419 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4420 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4421
4422 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4423 entry = &cfil_info->cfi_entries[kcunit - 1];
4424
4425 /* Are we attached to the filter? */
4426 if (entry->cfe_filter == NULL) {
4427 continue;
4428 }
4429
4430 if (outgoing) {
4431 entrybuf = &entry->cfe_snd;
4432 } else {
4433 entrybuf = &entry->cfe_rcv;
4434 }
4435
4436 entrybuf->cfe_ctl_q.q_start += datalen;
4437 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4438 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4439 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4440 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4441 }
4442
4443 entrybuf->cfe_ctl_q.q_end += datalen;
4444
4445 entrybuf->cfe_pending_q.q_start += datalen;
4446 entrybuf->cfe_pending_q.q_end += datalen;
4447 }
4448 CFIL_INFO_VERIFY(cfil_info);
4449 return 0;
4450 }
4451
4452 int
4453 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4454 struct mbuf *data, struct mbuf *control, uint32_t flags)
4455 {
4456 #pragma unused(to, control, flags)
4457 errno_t error = 0;
4458 unsigned int datalen;
4459 int mbcnt = 0;
4460 int mbnum = 0;
4461 int kcunit;
4462 struct cfi_buf *cfi_buf;
4463 struct mbuf *chain = NULL;
4464
4465 if (cfil_info == NULL) {
4466 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4467 (uint64_t)VM_KERNEL_ADDRPERM(so));
4468 error = 0;
4469 goto done;
4470 } else if (cfil_info->cfi_flags & CFIF_DROP) {
4471 CFIL_LOG(LOG_ERR, "so %llx drop set",
4472 (uint64_t)VM_KERNEL_ADDRPERM(so));
4473 error = EPIPE;
4474 goto done;
4475 }
4476
4477 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4478
4479 if (outgoing) {
4480 cfi_buf = &cfil_info->cfi_snd;
4481 cfil_info->cfi_byte_outbound_count += datalen;
4482 } else {
4483 cfi_buf = &cfil_info->cfi_rcv;
4484 cfil_info->cfi_byte_inbound_count += datalen;
4485 }
4486
4487 cfi_buf->cfi_pending_last += datalen;
4488 cfi_buf->cfi_pending_mbcnt += mbcnt;
4489 cfi_buf->cfi_pending_mbnum += mbnum;
4490
4491 if (IS_UDP(so)) {
4492 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4493 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4494 cfi_buf->cfi_tail_drop_cnt++;
4495 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4496 cfi_buf->cfi_pending_mbnum -= mbnum;
4497 return EPIPE;
4498 }
4499 }
4500
4501 cfil_info_buf_verify(cfi_buf);
4502
4503 #if DATA_DEBUG
4504 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4505 (uint64_t)VM_KERNEL_ADDRPERM(so),
4506 outgoing ? "OUT" : "IN",
4507 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4508 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4509 cfi_buf->cfi_pending_last,
4510 cfi_buf->cfi_pending_mbcnt,
4511 cfi_buf->cfi_pass_offset);
4512 #endif
4513
4514 /* Fast path when below pass offset */
4515 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4516 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4517 #if DATA_DEBUG
4518 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4519 #endif
4520 } else {
4521 struct cfil_entry *iter_entry;
4522 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4523 // Is cfil attached to this filter?
4524 kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4525 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4526 if (IS_UDP(so) && chain == NULL) {
4527 /* UDP only:
4528 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4529 * This full chain will be reinjected into socket after recieving verdict.
4530 */
4531 (void) cfil_udp_save_socket_state(cfil_info, data);
4532 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4533 if (chain == NULL) {
4534 return ENOBUFS;
4535 }
4536 data = chain;
4537 }
4538 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4539 datalen);
4540 }
4541 /* 0 means passed so continue with next filter */
4542 if (error != 0) {
4543 break;
4544 }
4545 }
4546 }
4547
4548 /* Move cursor if no filter claimed the data */
4549 if (error == 0) {
4550 cfi_buf->cfi_pending_first += datalen;
4551 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4552 cfi_buf->cfi_pending_mbnum -= mbnum;
4553 cfil_info_buf_verify(cfi_buf);
4554 }
4555 done:
4556 CFIL_INFO_VERIFY(cfil_info);
4557
4558 return error;
4559 }
4560
4561 /*
4562 * Callback from socket layer sosendxxx()
4563 */
4564 int
4565 cfil_sock_data_out(struct socket *so, struct sockaddr *to,
4566 struct mbuf *data, struct mbuf *control, uint32_t flags)
4567 {
4568 int error = 0;
4569
4570 if (IS_UDP(so)) {
4571 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4572 }
4573
4574 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4575 return 0;
4576 }
4577
4578 /*
4579 * Pass initial data for TFO.
4580 */
4581 if (IS_INITIAL_TFO_DATA(so)) {
4582 return 0;
4583 }
4584
4585 socket_lock_assert_owned(so);
4586
4587 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4588 CFIL_LOG(LOG_ERR, "so %llx drop set",
4589 (uint64_t)VM_KERNEL_ADDRPERM(so));
4590 return EPIPE;
4591 }
4592 if (control != NULL) {
4593 CFIL_LOG(LOG_ERR, "so %llx control",
4594 (uint64_t)VM_KERNEL_ADDRPERM(so));
4595 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4596 }
4597 if ((flags & MSG_OOB)) {
4598 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4599 (uint64_t)VM_KERNEL_ADDRPERM(so));
4600 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4601 }
4602 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4603 panic("so %p SB_LOCK not set", so);
4604 }
4605
4606 if (so->so_snd.sb_cfil_thread != NULL) {
4607 panic("%s sb_cfil_thread %p not NULL", __func__,
4608 so->so_snd.sb_cfil_thread);
4609 }
4610
4611 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4612
4613 return error;
4614 }
4615
4616 /*
4617 * Callback from socket layer sbappendxxx()
4618 */
4619 int
4620 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4621 struct mbuf *data, struct mbuf *control, uint32_t flags)
4622 {
4623 int error = 0;
4624
4625 if (IS_UDP(so)) {
4626 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4627 }
4628
4629 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4630 return 0;
4631 }
4632
4633 /*
4634 * Pass initial data for TFO.
4635 */
4636 if (IS_INITIAL_TFO_DATA(so)) {
4637 return 0;
4638 }
4639
4640 socket_lock_assert_owned(so);
4641
4642 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4643 CFIL_LOG(LOG_ERR, "so %llx drop set",
4644 (uint64_t)VM_KERNEL_ADDRPERM(so));
4645 return EPIPE;
4646 }
4647 if (control != NULL) {
4648 CFIL_LOG(LOG_ERR, "so %llx control",
4649 (uint64_t)VM_KERNEL_ADDRPERM(so));
4650 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4651 }
4652 if (data->m_type == MT_OOBDATA) {
4653 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4654 (uint64_t)VM_KERNEL_ADDRPERM(so));
4655 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4656 }
4657 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4658
4659 return error;
4660 }
4661
4662 /*
4663 * Callback from socket layer soshutdownxxx()
4664 *
4665 * We may delay the shutdown write if there's outgoing data in process.
4666 *
4667 * There is no point in delaying the shutdown read because the process
4668 * indicated that it does not want to read anymore data.
4669 */
4670 int
4671 cfil_sock_shutdown(struct socket *so, int *how)
4672 {
4673 int error = 0;
4674
4675 if (IS_UDP(so)) {
4676 return cfil_sock_udp_shutdown(so, how);
4677 }
4678
4679 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4680 goto done;
4681 }
4682
4683 socket_lock_assert_owned(so);
4684
4685 CFIL_LOG(LOG_INFO, "so %llx how %d",
4686 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4687
4688 /*
4689 * Check the state of the socket before the content filter
4690 */
4691 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4692 /* read already shut down */
4693 error = ENOTCONN;
4694 goto done;
4695 }
4696 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4697 /* write already shut down */
4698 error = ENOTCONN;
4699 goto done;
4700 }
4701
4702 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4703 CFIL_LOG(LOG_ERR, "so %llx drop set",
4704 (uint64_t)VM_KERNEL_ADDRPERM(so));
4705 goto done;
4706 }
4707
4708 /*
4709 * shutdown read: SHUT_RD or SHUT_RDWR
4710 */
4711 if (*how != SHUT_WR) {
4712 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4713 error = ENOTCONN;
4714 goto done;
4715 }
4716 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4717 cfil_sock_notify_shutdown(so, SHUT_RD);
4718 }
4719 /*
4720 * shutdown write: SHUT_WR or SHUT_RDWR
4721 */
4722 if (*how != SHUT_RD) {
4723 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4724 error = ENOTCONN;
4725 goto done;
4726 }
4727 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4728 cfil_sock_notify_shutdown(so, SHUT_WR);
4729 /*
4730 * When outgoing data is pending, we delay the shutdown at the
4731 * protocol level until the content filters give the final
4732 * verdict on the pending data.
4733 */
4734 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4735 /*
4736 * When shutting down the read and write sides at once
4737 * we can proceed to the final shutdown of the read
4738 * side. Otherwise, we just return.
4739 */
4740 if (*how == SHUT_WR) {
4741 error = EJUSTRETURN;
4742 } else if (*how == SHUT_RDWR) {
4743 *how = SHUT_RD;
4744 }
4745 }
4746 }
4747 done:
4748 return error;
4749 }
4750
4751 /*
4752 * This is called when the socket is closed and there is no more
4753 * opportunity for filtering
4754 */
4755 void
4756 cfil_sock_is_closed(struct socket *so)
4757 {
4758 errno_t error = 0;
4759 int kcunit;
4760
4761 if (IS_UDP(so)) {
4762 cfil_sock_udp_is_closed(so);
4763 return;
4764 }
4765
4766 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4767 return;
4768 }
4769
4770 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4771
4772 socket_lock_assert_owned(so);
4773
4774 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4775 /* Let the filters know of the closing */
4776 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4777 }
4778
4779 /* Last chance to push passed data out */
4780 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4781 if (error == 0) {
4782 cfil_service_inject_queue(so, so->so_cfil, 1);
4783 }
4784 cfil_release_sockbuf(so, 1);
4785
4786 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4787
4788 /* Pending data needs to go */
4789 cfil_flush_queues(so, so->so_cfil);
4790
4791 CFIL_INFO_VERIFY(so->so_cfil);
4792 }
4793
4794 /*
4795 * This is called when the socket is disconnected so let the filters
4796 * know about the disconnection and that no more data will come
4797 *
4798 * The how parameter has the same values as soshutown()
4799 */
4800 void
4801 cfil_sock_notify_shutdown(struct socket *so, int how)
4802 {
4803 errno_t error = 0;
4804 int kcunit;
4805
4806 if (IS_UDP(so)) {
4807 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4808 return;
4809 }
4810
4811 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4812 return;
4813 }
4814
4815 CFIL_LOG(LOG_INFO, "so %llx how %d",
4816 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4817
4818 socket_lock_assert_owned(so);
4819
4820 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4821 /* Disconnect incoming side */
4822 if (how != SHUT_WR) {
4823 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4824 }
4825 /* Disconnect outgoing side */
4826 if (how != SHUT_RD) {
4827 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
4828 }
4829 }
4830 }
4831
4832 static int
4833 cfil_filters_attached(struct socket *so)
4834 {
4835 struct cfil_entry *entry;
4836 uint32_t kcunit;
4837 int attached = 0;
4838
4839 if (IS_UDP(so)) {
4840 return cfil_filters_udp_attached(so, FALSE);
4841 }
4842
4843 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4844 return 0;
4845 }
4846
4847 socket_lock_assert_owned(so);
4848
4849 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4850 entry = &so->so_cfil->cfi_entries[kcunit - 1];
4851
4852 /* Are we attached to the filter? */
4853 if (entry->cfe_filter == NULL) {
4854 continue;
4855 }
4856 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
4857 continue;
4858 }
4859 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
4860 continue;
4861 }
4862 attached = 1;
4863 break;
4864 }
4865
4866 return attached;
4867 }
4868
4869 /*
4870 * This is called when the socket is closed and we are waiting for
4871 * the filters to gives the final pass or drop
4872 */
4873 void
4874 cfil_sock_close_wait(struct socket *so)
4875 {
4876 lck_mtx_t *mutex_held;
4877 struct timespec ts;
4878 int error;
4879
4880 if (IS_UDP(so)) {
4881 cfil_sock_udp_close_wait(so);
4882 return;
4883 }
4884
4885 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4886 return;
4887 }
4888
4889 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4890
4891 if (so->so_proto->pr_getlock != NULL) {
4892 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4893 } else {
4894 mutex_held = so->so_proto->pr_domain->dom_mtx;
4895 }
4896 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4897
4898 while (cfil_filters_attached(so)) {
4899 /*
4900 * Notify the filters we are going away so they can detach
4901 */
4902 cfil_sock_notify_shutdown(so, SHUT_RDWR);
4903
4904 /*
4905 * Make sure we need to wait after the filter are notified
4906 * of the disconnection
4907 */
4908 if (cfil_filters_attached(so) == 0) {
4909 break;
4910 }
4911
4912 CFIL_LOG(LOG_INFO, "so %llx waiting",
4913 (uint64_t)VM_KERNEL_ADDRPERM(so));
4914
4915 ts.tv_sec = cfil_close_wait_timeout / 1000;
4916 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
4917 NSEC_PER_USEC * 1000;
4918
4919 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4920 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
4921 error = msleep((caddr_t)so->so_cfil, mutex_held,
4922 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
4923 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4924
4925 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
4926 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
4927
4928 /*
4929 * Force close in case of timeout
4930 */
4931 if (error != 0) {
4932 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4933 break;
4934 }
4935 }
4936 }
4937
4938 /*
4939 * Returns the size of the data held by the content filter by using
4940 */
4941 int32_t
4942 cfil_sock_data_pending(struct sockbuf *sb)
4943 {
4944 struct socket *so = sb->sb_so;
4945 uint64_t pending = 0;
4946
4947 if (IS_UDP(so)) {
4948 return cfil_sock_udp_data_pending(sb, FALSE);
4949 }
4950
4951 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
4952 struct cfi_buf *cfi_buf;
4953
4954 socket_lock_assert_owned(so);
4955
4956 if ((sb->sb_flags & SB_RECV) == 0) {
4957 cfi_buf = &so->so_cfil->cfi_snd;
4958 } else {
4959 cfi_buf = &so->so_cfil->cfi_rcv;
4960 }
4961
4962 pending = cfi_buf->cfi_pending_last -
4963 cfi_buf->cfi_pending_first;
4964
4965 /*
4966 * If we are limited by the "chars of mbufs used" roughly
4967 * adjust so we won't overcommit
4968 */
4969 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
4970 pending = cfi_buf->cfi_pending_mbcnt;
4971 }
4972 }
4973
4974 VERIFY(pending < INT32_MAX);
4975
4976 return (int32_t)(pending);
4977 }
4978
4979 /*
4980 * Return the socket buffer space used by data being held by content filters
4981 * so processes won't clog the socket buffer
4982 */
4983 int32_t
4984 cfil_sock_data_space(struct sockbuf *sb)
4985 {
4986 struct socket *so = sb->sb_so;
4987 uint64_t pending = 0;
4988
4989 if (IS_UDP(so)) {
4990 return cfil_sock_udp_data_pending(sb, TRUE);
4991 }
4992
4993 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
4994 so->so_snd.sb_cfil_thread != current_thread()) {
4995 struct cfi_buf *cfi_buf;
4996
4997 socket_lock_assert_owned(so);
4998
4999 if ((sb->sb_flags & SB_RECV) == 0) {
5000 cfi_buf = &so->so_cfil->cfi_snd;
5001 } else {
5002 cfi_buf = &so->so_cfil->cfi_rcv;
5003 }
5004
5005 pending = cfi_buf->cfi_pending_last -
5006 cfi_buf->cfi_pending_first;
5007
5008 /*
5009 * If we are limited by the "chars of mbufs used" roughly
5010 * adjust so we won't overcommit
5011 */
5012 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5013 pending = cfi_buf->cfi_pending_mbcnt;
5014 }
5015 }
5016
5017 VERIFY(pending < INT32_MAX);
5018
5019 return (int32_t)(pending);
5020 }
5021
5022 /*
5023 * A callback from the socket and protocol layer when data becomes
5024 * available in the socket buffer to give a chance for the content filter
5025 * to re-inject data that was held back
5026 */
5027 void
5028 cfil_sock_buf_update(struct sockbuf *sb)
5029 {
5030 int outgoing;
5031 int error;
5032 struct socket *so = sb->sb_so;
5033
5034 if (IS_UDP(so)) {
5035 cfil_sock_udp_buf_update(sb);
5036 return;
5037 }
5038
5039 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5040 return;
5041 }
5042
5043 if (!cfil_sbtrim) {
5044 return;
5045 }
5046
5047 socket_lock_assert_owned(so);
5048
5049 if ((sb->sb_flags & SB_RECV) == 0) {
5050 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5051 return;
5052 }
5053 outgoing = 1;
5054 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5055 } else {
5056 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5057 return;
5058 }
5059 outgoing = 0;
5060 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5061 }
5062
5063 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5064 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5065
5066 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5067 if (error == 0) {
5068 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5069 }
5070 cfil_release_sockbuf(so, outgoing);
5071 }
5072
5073 int
5074 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5075 struct sysctl_req *req)
5076 {
5077 #pragma unused(oidp, arg1, arg2)
5078 int error = 0;
5079 size_t len = 0;
5080 u_int32_t i;
5081
5082 /* Read only */
5083 if (req->newptr != USER_ADDR_NULL) {
5084 return EPERM;
5085 }
5086
5087 cfil_rw_lock_shared(&cfil_lck_rw);
5088
5089 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5090 struct cfil_filter_stat filter_stat;
5091 struct content_filter *cfc = content_filters[i];
5092
5093 if (cfc == NULL) {
5094 continue;
5095 }
5096
5097 /* If just asking for the size */
5098 if (req->oldptr == USER_ADDR_NULL) {
5099 len += sizeof(struct cfil_filter_stat);
5100 continue;
5101 }
5102
5103 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5104 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5105 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5106 filter_stat.cfs_flags = cfc->cf_flags;
5107 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5108 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5109
5110 error = SYSCTL_OUT(req, &filter_stat,
5111 sizeof(struct cfil_filter_stat));
5112 if (error != 0) {
5113 break;
5114 }
5115 }
5116 /* If just asking for the size */
5117 if (req->oldptr == USER_ADDR_NULL) {
5118 req->oldidx = len;
5119 }
5120
5121 cfil_rw_unlock_shared(&cfil_lck_rw);
5122
5123 #if SHOW_DEBUG
5124 if (req->oldptr != USER_ADDR_NULL) {
5125 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5126 cfil_filter_show(i);
5127 }
5128 }
5129 #endif
5130
5131 return error;
5132 }
5133
5134 static int
5135 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5136 struct sysctl_req *req)
5137 {
5138 #pragma unused(oidp, arg1, arg2)
5139 int error = 0;
5140 u_int32_t i;
5141 struct cfil_info *cfi;
5142
5143 /* Read only */
5144 if (req->newptr != USER_ADDR_NULL) {
5145 return EPERM;
5146 }
5147
5148 cfil_rw_lock_shared(&cfil_lck_rw);
5149
5150 /*
5151 * If just asking for the size,
5152 */
5153 if (req->oldptr == USER_ADDR_NULL) {
5154 req->oldidx = cfil_sock_attached_count *
5155 sizeof(struct cfil_sock_stat);
5156 /* Bump the length in case new sockets gets attached */
5157 req->oldidx += req->oldidx >> 3;
5158 goto done;
5159 }
5160
5161 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5162 struct cfil_entry *entry;
5163 struct cfil_sock_stat stat;
5164 struct socket *so = cfi->cfi_so;
5165
5166 bzero(&stat, sizeof(struct cfil_sock_stat));
5167 stat.cfs_len = sizeof(struct cfil_sock_stat);
5168 stat.cfs_sock_id = cfi->cfi_sock_id;
5169 stat.cfs_flags = cfi->cfi_flags;
5170
5171 if (so != NULL) {
5172 stat.cfs_pid = so->last_pid;
5173 memcpy(stat.cfs_uuid, so->last_uuid,
5174 sizeof(uuid_t));
5175 if (so->so_flags & SOF_DELEGATED) {
5176 stat.cfs_e_pid = so->e_pid;
5177 memcpy(stat.cfs_e_uuid, so->e_uuid,
5178 sizeof(uuid_t));
5179 } else {
5180 stat.cfs_e_pid = so->last_pid;
5181 memcpy(stat.cfs_e_uuid, so->last_uuid,
5182 sizeof(uuid_t));
5183 }
5184
5185 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5186 stat.cfs_sock_type = so->so_proto->pr_type;
5187 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5188 }
5189
5190 stat.cfs_snd.cbs_pending_first =
5191 cfi->cfi_snd.cfi_pending_first;
5192 stat.cfs_snd.cbs_pending_last =
5193 cfi->cfi_snd.cfi_pending_last;
5194 stat.cfs_snd.cbs_inject_q_len =
5195 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5196 stat.cfs_snd.cbs_pass_offset =
5197 cfi->cfi_snd.cfi_pass_offset;
5198
5199 stat.cfs_rcv.cbs_pending_first =
5200 cfi->cfi_rcv.cfi_pending_first;
5201 stat.cfs_rcv.cbs_pending_last =
5202 cfi->cfi_rcv.cfi_pending_last;
5203 stat.cfs_rcv.cbs_inject_q_len =
5204 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5205 stat.cfs_rcv.cbs_pass_offset =
5206 cfi->cfi_rcv.cfi_pass_offset;
5207
5208 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5209 struct cfil_entry_stat *estat;
5210 struct cfe_buf *ebuf;
5211 struct cfe_buf_stat *sbuf;
5212
5213 entry = &cfi->cfi_entries[i];
5214
5215 estat = &stat.ces_entries[i];
5216
5217 estat->ces_len = sizeof(struct cfil_entry_stat);
5218 estat->ces_filter_id = entry->cfe_filter ?
5219 entry->cfe_filter->cf_kcunit : 0;
5220 estat->ces_flags = entry->cfe_flags;
5221 estat->ces_necp_control_unit =
5222 entry->cfe_necp_control_unit;
5223
5224 estat->ces_last_event.tv_sec =
5225 (int64_t)entry->cfe_last_event.tv_sec;
5226 estat->ces_last_event.tv_usec =
5227 (int64_t)entry->cfe_last_event.tv_usec;
5228
5229 estat->ces_last_action.tv_sec =
5230 (int64_t)entry->cfe_last_action.tv_sec;
5231 estat->ces_last_action.tv_usec =
5232 (int64_t)entry->cfe_last_action.tv_usec;
5233
5234 ebuf = &entry->cfe_snd;
5235 sbuf = &estat->ces_snd;
5236 sbuf->cbs_pending_first =
5237 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5238 sbuf->cbs_pending_last =
5239 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5240 sbuf->cbs_ctl_first =
5241 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5242 sbuf->cbs_ctl_last =
5243 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5244 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5245 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5246 sbuf->cbs_peeked = ebuf->cfe_peeked;
5247
5248 ebuf = &entry->cfe_rcv;
5249 sbuf = &estat->ces_rcv;
5250 sbuf->cbs_pending_first =
5251 cfil_queue_offset_first(&ebuf->cfe_pending_q);
5252 sbuf->cbs_pending_last =
5253 cfil_queue_offset_last(&ebuf->cfe_pending_q);
5254 sbuf->cbs_ctl_first =
5255 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5256 sbuf->cbs_ctl_last =
5257 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5258 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
5259 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
5260 sbuf->cbs_peeked = ebuf->cfe_peeked;
5261 }
5262 error = SYSCTL_OUT(req, &stat,
5263 sizeof(struct cfil_sock_stat));
5264 if (error != 0) {
5265 break;
5266 }
5267 }
5268 done:
5269 cfil_rw_unlock_shared(&cfil_lck_rw);
5270
5271 #if SHOW_DEBUG
5272 if (req->oldptr != USER_ADDR_NULL) {
5273 cfil_info_show();
5274 }
5275 #endif
5276
5277 return error;
5278 }
5279
5280 /*
5281 * UDP Socket Support
5282 */
5283 static void
5284 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5285 {
5286 char local[MAX_IPv6_STR_LEN + 6];
5287 char remote[MAX_IPv6_STR_LEN + 6];
5288 const void *addr;
5289
5290 // No sock or not UDP, no-op
5291 if (so == NULL || entry == NULL) {
5292 return;
5293 }
5294
5295 local[0] = remote[0] = 0x0;
5296
5297 switch (entry->cfentry_family) {
5298 case AF_INET6:
5299 addr = &entry->cfentry_laddr.addr6;
5300 inet_ntop(AF_INET6, addr, local, sizeof(local));
5301 addr = &entry->cfentry_faddr.addr6;
5302 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5303 break;
5304 case AF_INET:
5305 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5306 inet_ntop(AF_INET, addr, local, sizeof(local));
5307 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5308 inet_ntop(AF_INET, addr, remote, sizeof(local));
5309 break;
5310 default:
5311 return;
5312 }
5313
5314 CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5315 msg,
5316 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5317 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5318 }
5319
5320 static void
5321 cfil_inp_log(int level, struct socket *so, const char* msg)
5322 {
5323 struct inpcb *inp = NULL;
5324 char local[MAX_IPv6_STR_LEN + 6];
5325 char remote[MAX_IPv6_STR_LEN + 6];
5326 const void *addr;
5327
5328 if (so == NULL) {
5329 return;
5330 }
5331
5332 inp = sotoinpcb(so);
5333 if (inp == NULL) {
5334 return;
5335 }
5336
5337 local[0] = remote[0] = 0x0;
5338
5339 #if INET6
5340 if (inp->inp_vflag & INP_IPV6) {
5341 addr = &inp->in6p_laddr.s6_addr32;
5342 inet_ntop(AF_INET6, addr, local, sizeof(local));
5343 addr = &inp->in6p_faddr.s6_addr32;
5344 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5345 } else
5346 #endif /* INET6 */
5347 {
5348 addr = &inp->inp_laddr.s_addr;
5349 inet_ntop(AF_INET, addr, local, sizeof(local));
5350 addr = &inp->inp_faddr.s_addr;
5351 inet_ntop(AF_INET, addr, remote, sizeof(local));
5352 }
5353
5354 if (so->so_cfil != NULL) {
5355 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5356 msg, IS_UDP(so) ? "UDP" : "TCP",
5357 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5358 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5359 } else {
5360 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5361 msg, IS_UDP(so) ? "UDP" : "TCP",
5362 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5363 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5364 }
5365 }
5366
5367 static void
5368 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5369 {
5370 if (cfil_info == NULL) {
5371 return;
5372 }
5373
5374 if (cfil_info->cfi_hash_entry != NULL) {
5375 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5376 } else {
5377 cfil_inp_log(level, cfil_info->cfi_so, msg);
5378 }
5379 }
5380
5381 errno_t
5382 cfil_db_init(struct socket *so)
5383 {
5384 errno_t error = 0;
5385 struct cfil_db *db = NULL;
5386
5387 CFIL_LOG(LOG_INFO, "");
5388
5389 db = zalloc(cfil_db_zone);
5390 if (db == NULL) {
5391 error = ENOMEM;
5392 goto done;
5393 }
5394 bzero(db, sizeof(struct cfil_db));
5395 db->cfdb_so = so;
5396 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5397 if (db->cfdb_hashbase == NULL) {
5398 zfree(cfil_db_zone, db);
5399 db = NULL;
5400 error = ENOMEM;
5401 goto done;
5402 }
5403
5404 so->so_cfil_db = db;
5405
5406 done:
5407 return error;
5408 }
5409
5410 void
5411 cfil_db_free(struct socket *so)
5412 {
5413 struct cfil_hash_entry *entry = NULL;
5414 struct cfil_hash_entry *temp_entry = NULL;
5415 struct cfilhashhead *cfilhash = NULL;
5416 struct cfil_db *db = NULL;
5417
5418 CFIL_LOG(LOG_INFO, "");
5419
5420 if (so == NULL || so->so_cfil_db == NULL) {
5421 return;
5422 }
5423 db = so->so_cfil_db;
5424
5425 #if LIFECYCLE_DEBUG
5426 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5427 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5428 #endif
5429
5430 for (int i = 0; i < CFILHASHSIZE; i++) {
5431 cfilhash = &db->cfdb_hashbase[i];
5432 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5433 if (entry->cfentry_cfil != NULL) {
5434 #if LIFECYCLE_DEBUG
5435 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5436 #endif
5437 cfil_info_free(entry->cfentry_cfil);
5438 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5439 entry->cfentry_cfil = NULL;
5440 }
5441
5442 cfil_db_delete_entry(db, entry);
5443 if (so->so_flags & SOF_CONTENT_FILTER) {
5444 if (db->cfdb_count == 0) {
5445 so->so_flags &= ~SOF_CONTENT_FILTER;
5446 }
5447 VERIFY(so->so_usecount > 0);
5448 so->so_usecount--;
5449 }
5450 }
5451 }
5452
5453 // Make sure all entries are cleaned up!
5454 VERIFY(db->cfdb_count == 0);
5455 #if LIFECYCLE_DEBUG
5456 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5457 #endif
5458
5459 FREE(db->cfdb_hashbase, M_CFIL);
5460 zfree(cfil_db_zone, db);
5461 so->so_cfil_db = NULL;
5462 }
5463
5464 static bool
5465 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5466 {
5467 struct sockaddr_in *sin = NULL;
5468 struct sockaddr_in6 *sin6 = NULL;
5469
5470 if (entry == NULL || addr == NULL) {
5471 return FALSE;
5472 }
5473
5474 switch (addr->sa_family) {
5475 case AF_INET:
5476 sin = satosin(addr);
5477 if (sin->sin_len != sizeof(*sin)) {
5478 return FALSE;
5479 }
5480 if (isLocal == TRUE) {
5481 entry->cfentry_lport = sin->sin_port;
5482 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5483 } else {
5484 entry->cfentry_fport = sin->sin_port;
5485 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5486 }
5487 entry->cfentry_family = AF_INET;
5488 return TRUE;
5489 case AF_INET6:
5490 sin6 = satosin6(addr);
5491 if (sin6->sin6_len != sizeof(*sin6)) {
5492 return FALSE;
5493 }
5494 if (isLocal == TRUE) {
5495 entry->cfentry_lport = sin6->sin6_port;
5496 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5497 } else {
5498 entry->cfentry_fport = sin6->sin6_port;
5499 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5500 }
5501 entry->cfentry_family = AF_INET6;
5502 return TRUE;
5503 default:
5504 return FALSE;
5505 }
5506 }
5507
5508 static bool
5509 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5510 {
5511 if (entry == NULL || inp == NULL) {
5512 return FALSE;
5513 }
5514
5515 if (inp->inp_vflag & INP_IPV4) {
5516 if (isLocal == TRUE) {
5517 entry->cfentry_lport = inp->inp_lport;
5518 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5519 } else {
5520 entry->cfentry_fport = inp->inp_fport;
5521 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5522 }
5523 entry->cfentry_family = AF_INET;
5524 return TRUE;
5525 } else if (inp->inp_vflag & INP_IPV6) {
5526 if (isLocal == TRUE) {
5527 entry->cfentry_lport = inp->inp_lport;
5528 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5529 } else {
5530 entry->cfentry_fport = inp->inp_fport;
5531 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5532 }
5533 entry->cfentry_family = AF_INET6;
5534 return TRUE;
5535 }
5536 return FALSE;
5537 }
5538
5539 bool
5540 check_port(struct sockaddr *addr, u_short port)
5541 {
5542 struct sockaddr_in *sin = NULL;
5543 struct sockaddr_in6 *sin6 = NULL;
5544
5545 if (addr == NULL || port == 0) {
5546 return FALSE;
5547 }
5548
5549 switch (addr->sa_family) {
5550 case AF_INET:
5551 sin = satosin(addr);
5552 if (sin->sin_len != sizeof(*sin)) {
5553 return FALSE;
5554 }
5555 if (port == ntohs(sin->sin_port)) {
5556 return TRUE;
5557 }
5558 break;
5559 case AF_INET6:
5560 sin6 = satosin6(addr);
5561 if (sin6->sin6_len != sizeof(*sin6)) {
5562 return FALSE;
5563 }
5564 if (port == ntohs(sin6->sin6_port)) {
5565 return TRUE;
5566 }
5567 break;
5568 default:
5569 break;
5570 }
5571 return FALSE;
5572 }
5573
5574 struct cfil_hash_entry *
5575 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5576 {
5577 struct cfilhashhead *cfilhash = NULL;
5578 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5579 struct cfil_hash_entry *nextentry;
5580
5581 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5582 return NULL;
5583 }
5584
5585 flowhash &= db->cfdb_hashmask;
5586 cfilhash = &db->cfdb_hashbase[flowhash];
5587
5588 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5589 if (nextentry->cfentry_cfil != NULL &&
5590 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5591 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5592 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5593 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5594 return nextentry;
5595 }
5596 }
5597
5598 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5599 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5600 return NULL;
5601 }
5602
5603 struct cfil_hash_entry *
5604 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5605 {
5606 struct cfil_hash_entry matchentry;
5607 struct cfil_hash_entry *nextentry = NULL;
5608 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5609 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5610 int inp_hash_element = 0;
5611 struct cfilhashhead *cfilhash = NULL;
5612
5613 CFIL_LOG(LOG_INFO, "");
5614
5615 if (inp == NULL) {
5616 goto done;
5617 }
5618
5619 if (local != NULL) {
5620 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5621 } else {
5622 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5623 }
5624 if (remote != NULL) {
5625 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5626 } else {
5627 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5628 }
5629
5630 #if INET6
5631 if (inp->inp_vflag & INP_IPV6) {
5632 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5633 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5634 } else
5635 #endif /* INET6 */
5636 {
5637 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5638 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5639 }
5640
5641 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5642 matchentry.cfentry_lport, matchentry.cfentry_fport);
5643 inp_hash_element &= db->cfdb_hashmask;
5644
5645 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5646
5647 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5648 #if INET6
5649 if ((inp->inp_vflag & INP_IPV6) &&
5650 nextentry->cfentry_lport == matchentry.cfentry_lport &&
5651 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5652 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5653 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5654 #if DATA_DEBUG
5655 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5656 #endif
5657 return nextentry;
5658 } else
5659 #endif /* INET6 */
5660 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5661 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5662 nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5663 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5664 #if DATA_DEBUG
5665 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5666 #endif
5667 return nextentry;
5668 }
5669 }
5670
5671 done:
5672 #if DATA_DEBUG
5673 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5674 #endif
5675 return NULL;
5676 }
5677
5678 void
5679 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5680 {
5681 if (hash_entry == NULL) {
5682 return;
5683 }
5684 if (db == NULL || db->cfdb_count == 0) {
5685 return;
5686 }
5687 db->cfdb_count--;
5688 if (db->cfdb_only_entry == hash_entry) {
5689 db->cfdb_only_entry = NULL;
5690 }
5691 LIST_REMOVE(hash_entry, cfentry_link);
5692 zfree(cfil_hash_entry_zone, hash_entry);
5693 }
5694
5695 struct cfil_hash_entry *
5696 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5697 {
5698 struct cfil_hash_entry *entry = NULL;
5699 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5700 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5701 int inp_hash_element = 0;
5702 struct cfilhashhead *cfilhash = NULL;
5703
5704 CFIL_LOG(LOG_INFO, "");
5705
5706 if (inp == NULL) {
5707 goto done;
5708 }
5709
5710 entry = zalloc(cfil_hash_entry_zone);
5711 if (entry == NULL) {
5712 goto done;
5713 }
5714 bzero(entry, sizeof(struct cfil_hash_entry));
5715
5716 if (local != NULL) {
5717 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5718 } else {
5719 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5720 }
5721 if (remote != NULL) {
5722 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5723 } else {
5724 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5725 }
5726 entry->cfentry_lastused = net_uptime();
5727
5728 #if INET6
5729 if (inp->inp_vflag & INP_IPV6) {
5730 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5731 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5732 } else
5733 #endif /* INET6 */
5734 {
5735 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5736 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5737 }
5738 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5739 entry->cfentry_lport, entry->cfentry_fport);
5740 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5741
5742 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5743
5744 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5745 db->cfdb_count++;
5746 db->cfdb_only_entry = entry;
5747 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5748
5749 done:
5750 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5751 return entry;
5752 }
5753
5754 struct cfil_info *
5755 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5756 {
5757 struct cfil_hash_entry *hash_entry = NULL;
5758
5759 CFIL_LOG(LOG_INFO, "");
5760
5761 if (db == NULL || id == 0) {
5762 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5763 db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
5764 return NULL;
5765 }
5766
5767 // This is an optimization for connected UDP socket which only has one flow.
5768 // No need to do the hash lookup.
5769 if (db->cfdb_count == 1) {
5770 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5771 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5772 return db->cfdb_only_entry->cfentry_cfil;
5773 }
5774 }
5775
5776 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5777 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
5778 }
5779
5780 struct cfil_hash_entry *
5781 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5782 {
5783 struct cfil_hash_entry *hash_entry = NULL;
5784
5785 errno_t error = 0;
5786 socket_lock_assert_owned(so);
5787
5788 // If new socket, allocate cfil db
5789 if (so->so_cfil_db == NULL) {
5790 if (cfil_db_init(so) != 0) {
5791 return NULL;
5792 }
5793 }
5794
5795 // See if flow already exists.
5796 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5797 if (hash_entry != NULL) {
5798 return hash_entry;
5799 }
5800
5801 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5802 if (hash_entry == NULL) {
5803 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5804 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5805 return NULL;
5806 }
5807
5808 if (cfil_info_alloc(so, hash_entry) == NULL ||
5809 hash_entry->cfentry_cfil == NULL) {
5810 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5811 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5812 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5813 return NULL;
5814 }
5815 hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
5816
5817 #if LIFECYCLE_DEBUG
5818 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5819 #endif
5820
5821 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5822 cfil_info_free(hash_entry->cfentry_cfil);
5823 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5824 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5825 filter_control_unit);
5826 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5827 return NULL;
5828 }
5829 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5830 (uint64_t)VM_KERNEL_ADDRPERM(so),
5831 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5832
5833 so->so_flags |= SOF_CONTENT_FILTER;
5834 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5835
5836 /* Hold a reference on the socket for each flow */
5837 so->so_usecount++;
5838
5839 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
5840 outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
5841 /* We can recover from flow control or out of memory errors */
5842 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
5843 return NULL;
5844 }
5845
5846 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5847 return hash_entry;
5848 }
5849
5850 errno_t
5851 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
5852 struct sockaddr *local, struct sockaddr *remote,
5853 struct mbuf *data, struct mbuf *control, uint32_t flags)
5854 {
5855 #pragma unused(outgoing, so, local, remote, data, control, flags)
5856 errno_t error = 0;
5857 uint32_t filter_control_unit;
5858 struct cfil_hash_entry *hash_entry = NULL;
5859 struct cfil_info *cfil_info = NULL;
5860
5861 socket_lock_assert_owned(so);
5862
5863 if (cfil_active_count == 0) {
5864 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5865 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5866 return error;
5867 }
5868
5869 // Socket has been blessed
5870 if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
5871 return error;
5872 }
5873
5874 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5875 if (filter_control_unit == 0) {
5876 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5877 return error;
5878 }
5879
5880 if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
5881 return error;
5882 }
5883
5884 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
5885 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5886 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5887 return error;
5888 }
5889
5890 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5891 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
5892 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
5893 return EPIPE;
5894 }
5895 // Update last used timestamp, this is for flow Idle TO
5896 hash_entry->cfentry_lastused = net_uptime();
5897 cfil_info = hash_entry->cfentry_cfil;
5898
5899 if (cfil_info->cfi_flags & CFIF_DROP) {
5900 #if DATA_DEBUG
5901 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
5902 #endif
5903 return EPIPE;
5904 }
5905 if (control != NULL) {
5906 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5907 }
5908 if (data->m_type == MT_OOBDATA) {
5909 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5910 (uint64_t)VM_KERNEL_ADDRPERM(so));
5911 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5912 }
5913
5914 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5915
5916 return error;
5917 }
5918
5919 /*
5920 * Go through all UDP flows for specified socket and returns TRUE if
5921 * any flow is still attached. If need_wait is TRUE, wait on first
5922 * attached flow.
5923 */
5924 static int
5925 cfil_filters_udp_attached(struct socket *so, bool need_wait)
5926 {
5927 struct timespec ts;
5928 lck_mtx_t *mutex_held;
5929 struct cfilhashhead *cfilhash = NULL;
5930 struct cfil_db *db = NULL;
5931 struct cfil_hash_entry *hash_entry = NULL;
5932 struct cfil_hash_entry *temp_hash_entry = NULL;
5933 struct cfil_info *cfil_info = NULL;
5934 struct cfil_entry *entry = NULL;
5935 errno_t error = 0;
5936 int kcunit;
5937 int attached = 0;
5938 uint64_t sock_flow_id = 0;
5939
5940 socket_lock_assert_owned(so);
5941
5942 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5943 if (so->so_proto->pr_getlock != NULL) {
5944 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5945 } else {
5946 mutex_held = so->so_proto->pr_domain->dom_mtx;
5947 }
5948 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5949
5950 db = so->so_cfil_db;
5951
5952 for (int i = 0; i < CFILHASHSIZE; i++) {
5953 cfilhash = &db->cfdb_hashbase[i];
5954
5955 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5956 if (hash_entry->cfentry_cfil != NULL) {
5957 cfil_info = hash_entry->cfentry_cfil;
5958 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5959 entry = &cfil_info->cfi_entries[kcunit - 1];
5960
5961 /* Are we attached to the filter? */
5962 if (entry->cfe_filter == NULL) {
5963 continue;
5964 }
5965
5966 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5967 continue;
5968 }
5969 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5970 continue;
5971 }
5972
5973 attached = 1;
5974
5975 if (need_wait == TRUE) {
5976 #if LIFECYCLE_DEBUG
5977 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5978 #endif
5979
5980 ts.tv_sec = cfil_close_wait_timeout / 1000;
5981 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5982 NSEC_PER_USEC * 1000;
5983
5984 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5985 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
5986 sock_flow_id = cfil_info->cfi_sock_id;
5987
5988 error = msleep((caddr_t)cfil_info, mutex_held,
5989 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
5990
5991 // Woke up from sleep, validate if cfil_info is still valid
5992 if (so->so_cfil_db == NULL ||
5993 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
5994 // cfil_info is not valid, do not continue
5995 goto done;
5996 }
5997
5998 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5999
6000 #if LIFECYCLE_DEBUG
6001 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6002 #endif
6003
6004 /*
6005 * Force close in case of timeout
6006 */
6007 if (error != 0) {
6008 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6009 #if LIFECYCLE_DEBUG
6010 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6011 #endif
6012 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6013 }
6014 }
6015 goto done;
6016 }
6017 }
6018 }
6019 }
6020 }
6021
6022 done:
6023 return attached;
6024 }
6025
6026 int32_t
6027 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6028 {
6029 struct socket *so = sb->sb_so;
6030 struct cfi_buf *cfi_buf;
6031 uint64_t pending = 0;
6032 uint64_t total_pending = 0;
6033 struct cfilhashhead *cfilhash = NULL;
6034 struct cfil_db *db = NULL;
6035 struct cfil_hash_entry *hash_entry = NULL;
6036 struct cfil_hash_entry *temp_hash_entry = NULL;
6037
6038 socket_lock_assert_owned(so);
6039
6040 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6041 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6042 db = so->so_cfil_db;
6043
6044 for (int i = 0; i < CFILHASHSIZE; i++) {
6045 cfilhash = &db->cfdb_hashbase[i];
6046
6047 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6048 if (hash_entry->cfentry_cfil != NULL) {
6049 if ((sb->sb_flags & SB_RECV) == 0) {
6050 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6051 } else {
6052 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6053 }
6054
6055 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6056 /*
6057 * If we are limited by the "chars of mbufs used" roughly
6058 * adjust so we won't overcommit
6059 */
6060 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6061 pending = cfi_buf->cfi_pending_mbcnt;
6062 }
6063
6064 total_pending += pending;
6065 }
6066 }
6067 }
6068
6069 VERIFY(total_pending < INT32_MAX);
6070 #if DATA_DEBUG
6071 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6072 (uint64_t)VM_KERNEL_ADDRPERM(so),
6073 total_pending, check_thread);
6074 #endif
6075 }
6076
6077 return (int32_t)(total_pending);
6078 }
6079
6080 int
6081 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6082 {
6083 struct cfil_info *cfil_info = NULL;
6084 struct cfilhashhead *cfilhash = NULL;
6085 struct cfil_db *db = NULL;
6086 struct cfil_hash_entry *hash_entry = NULL;
6087 struct cfil_hash_entry *temp_hash_entry = NULL;
6088 errno_t error = 0;
6089 int done_count = 0;
6090 int kcunit;
6091
6092 socket_lock_assert_owned(so);
6093
6094 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6095 db = so->so_cfil_db;
6096
6097 for (int i = 0; i < CFILHASHSIZE; i++) {
6098 cfilhash = &db->cfdb_hashbase[i];
6099
6100 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6101 if (hash_entry->cfentry_cfil != NULL) {
6102 cfil_info = hash_entry->cfentry_cfil;
6103
6104 // This flow is marked as DROP
6105 if (cfil_info->cfi_flags & drop_flag) {
6106 done_count++;
6107 continue;
6108 }
6109
6110 // This flow has been shut already, skip
6111 if (cfil_info->cfi_flags & shut_flag) {
6112 continue;
6113 }
6114 // Mark flow as shut
6115 cfil_info->cfi_flags |= shut_flag;
6116 done_count++;
6117
6118 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6119 /* Disconnect incoming side */
6120 if (how != SHUT_WR) {
6121 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6122 }
6123 /* Disconnect outgoing side */
6124 if (how != SHUT_RD) {
6125 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6126 }
6127 }
6128 }
6129 }
6130 }
6131 }
6132
6133 if (done_count == 0) {
6134 error = ENOTCONN;
6135 }
6136 return error;
6137 }
6138
6139 int
6140 cfil_sock_udp_shutdown(struct socket *so, int *how)
6141 {
6142 int error = 0;
6143
6144 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6145 goto done;
6146 }
6147
6148 socket_lock_assert_owned(so);
6149
6150 CFIL_LOG(LOG_INFO, "so %llx how %d",
6151 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6152
6153 /*
6154 * Check the state of the socket before the content filter
6155 */
6156 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6157 /* read already shut down */
6158 error = ENOTCONN;
6159 goto done;
6160 }
6161 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6162 /* write already shut down */
6163 error = ENOTCONN;
6164 goto done;
6165 }
6166
6167 /*
6168 * shutdown read: SHUT_RD or SHUT_RDWR
6169 */
6170 if (*how != SHUT_WR) {
6171 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6172 if (error != 0) {
6173 goto done;
6174 }
6175 }
6176 /*
6177 * shutdown write: SHUT_WR or SHUT_RDWR
6178 */
6179 if (*how != SHUT_RD) {
6180 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6181 if (error != 0) {
6182 goto done;
6183 }
6184
6185 /*
6186 * When outgoing data is pending, we delay the shutdown at the
6187 * protocol level until the content filters give the final
6188 * verdict on the pending data.
6189 */
6190 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6191 /*
6192 * When shutting down the read and write sides at once
6193 * we can proceed to the final shutdown of the read
6194 * side. Otherwise, we just return.
6195 */
6196 if (*how == SHUT_WR) {
6197 error = EJUSTRETURN;
6198 } else if (*how == SHUT_RDWR) {
6199 *how = SHUT_RD;
6200 }
6201 }
6202 }
6203 done:
6204 return error;
6205 }
6206
6207 void
6208 cfil_sock_udp_close_wait(struct socket *so)
6209 {
6210 socket_lock_assert_owned(so);
6211
6212 while (cfil_filters_udp_attached(so, FALSE)) {
6213 /*
6214 * Notify the filters we are going away so they can detach
6215 */
6216 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6217
6218 /*
6219 * Make sure we need to wait after the filter are notified
6220 * of the disconnection
6221 */
6222 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6223 break;
6224 }
6225 }
6226 }
6227
6228 void
6229 cfil_sock_udp_is_closed(struct socket *so)
6230 {
6231 struct cfil_info *cfil_info = NULL;
6232 struct cfilhashhead *cfilhash = NULL;
6233 struct cfil_db *db = NULL;
6234 struct cfil_hash_entry *hash_entry = NULL;
6235 struct cfil_hash_entry *temp_hash_entry = NULL;
6236 errno_t error = 0;
6237 int kcunit;
6238
6239 socket_lock_assert_owned(so);
6240
6241 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6242 db = so->so_cfil_db;
6243
6244 for (int i = 0; i < CFILHASHSIZE; i++) {
6245 cfilhash = &db->cfdb_hashbase[i];
6246
6247 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6248 if (hash_entry->cfentry_cfil != NULL) {
6249 cfil_info = hash_entry->cfentry_cfil;
6250
6251 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6252 /* Let the filters know of the closing */
6253 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6254 }
6255
6256 /* Last chance to push passed data out */
6257 error = cfil_acquire_sockbuf(so, cfil_info, 1);
6258 if (error == 0) {
6259 cfil_service_inject_queue(so, cfil_info, 1);
6260 }
6261 cfil_release_sockbuf(so, 1);
6262
6263 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6264
6265 /* Pending data needs to go */
6266 cfil_flush_queues(so, cfil_info);
6267
6268 CFIL_INFO_VERIFY(cfil_info);
6269 }
6270 }
6271 }
6272 }
6273 }
6274
6275 void
6276 cfil_sock_udp_buf_update(struct sockbuf *sb)
6277 {
6278 struct cfil_info *cfil_info = NULL;
6279 struct cfilhashhead *cfilhash = NULL;
6280 struct cfil_db *db = NULL;
6281 struct cfil_hash_entry *hash_entry = NULL;
6282 struct cfil_hash_entry *temp_hash_entry = NULL;
6283 errno_t error = 0;
6284 int outgoing;
6285 struct socket *so = sb->sb_so;
6286
6287 socket_lock_assert_owned(so);
6288
6289 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6290 if (!cfil_sbtrim) {
6291 return;
6292 }
6293
6294 db = so->so_cfil_db;
6295
6296 for (int i = 0; i < CFILHASHSIZE; i++) {
6297 cfilhash = &db->cfdb_hashbase[i];
6298
6299 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6300 if (hash_entry->cfentry_cfil != NULL) {
6301 cfil_info = hash_entry->cfentry_cfil;
6302
6303 if ((sb->sb_flags & SB_RECV) == 0) {
6304 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6305 return;
6306 }
6307 outgoing = 1;
6308 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6309 } else {
6310 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6311 return;
6312 }
6313 outgoing = 0;
6314 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6315 }
6316
6317 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6318 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6319
6320 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6321 if (error == 0) {
6322 cfil_service_inject_queue(so, cfil_info, outgoing);
6323 }
6324 cfil_release_sockbuf(so, outgoing);
6325 }
6326 }
6327 }
6328 }
6329 }
6330
6331 void
6332 cfil_filter_show(u_int32_t kcunit)
6333 {
6334 struct content_filter *cfc = NULL;
6335 struct cfil_entry *entry;
6336 int count = 0;
6337
6338 if (content_filters == NULL) {
6339 return;
6340 }
6341 if (kcunit > MAX_CONTENT_FILTER) {
6342 return;
6343 }
6344
6345 cfil_rw_lock_shared(&cfil_lck_rw);
6346
6347 if (content_filters[kcunit - 1] == NULL) {
6348 cfil_rw_unlock_shared(&cfil_lck_rw);
6349 return;
6350 }
6351 cfc = content_filters[kcunit - 1];
6352
6353 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6354 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6355 if (cfc->cf_flags & CFF_DETACHING) {
6356 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6357 }
6358 if (cfc->cf_flags & CFF_ACTIVE) {
6359 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6360 }
6361 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6362 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6363 }
6364
6365 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6366 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6367 struct cfil_info *cfil_info = entry->cfe_cfil_info;
6368
6369 count++;
6370
6371 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6372 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6373 } else {
6374 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6375 }
6376 }
6377 }
6378
6379 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6380
6381 cfil_rw_unlock_shared(&cfil_lck_rw);
6382 }
6383
6384 void
6385 cfil_info_show(void)
6386 {
6387 struct cfil_info *cfil_info;
6388 int count = 0;
6389
6390 cfil_rw_lock_shared(&cfil_lck_rw);
6391
6392 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6393
6394 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6395 count++;
6396
6397 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6398
6399 if (cfil_info->cfi_flags & CFIF_DROP) {
6400 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6401 }
6402 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6403 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6404 }
6405 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6406 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6407 }
6408 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6409 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6410 }
6411 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6412 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6413 }
6414 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6415 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6416 }
6417 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6418 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6419 }
6420 }
6421
6422 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6423
6424 cfil_rw_unlock_shared(&cfil_lck_rw);
6425 }
6426
6427 bool
6428 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6429 {
6430 if (cfil_info && cfil_info->cfi_hash_entry &&
6431 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6432 #if GC_DEBUG
6433 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6434 #endif
6435 return true;
6436 }
6437 return false;
6438 }
6439
6440 bool
6441 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6442 {
6443 struct cfil_entry *entry;
6444 struct timeval current_tv;
6445 struct timeval diff_time;
6446
6447 if (cfil_info == NULL) {
6448 return false;
6449 }
6450
6451 /*
6452 * If we have queued up more data than passed offset and we haven't received
6453 * an action from user space for a while (the user space filter might have crashed),
6454 * return action timed out.
6455 */
6456 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6457 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6458 microuptime(&current_tv);
6459
6460 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6461 entry = &cfil_info->cfi_entries[kcunit - 1];
6462
6463 if (entry->cfe_filter == NULL) {
6464 continue;
6465 }
6466
6467 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6468 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6469 // haven't gotten an action from this filter, check timeout
6470 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6471 if (diff_time.tv_sec >= timeout) {
6472 #if GC_DEBUG
6473 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6474 #endif
6475 return true;
6476 }
6477 }
6478 }
6479 }
6480 return false;
6481 }
6482
6483 bool
6484 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6485 {
6486 if (cfil_info == NULL) {
6487 return false;
6488 }
6489
6490 /*
6491 * Clean up flow if it exceeded queue thresholds
6492 */
6493 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6494 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6495 #if GC_DEBUG
6496 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6497 cfil_udp_gc_mbuf_num_max,
6498 cfil_udp_gc_mbuf_cnt_max,
6499 cfil_info->cfi_snd.cfi_tail_drop_cnt,
6500 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6501 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6502 #endif
6503 return true;
6504 }
6505
6506 return false;
6507 }
6508
6509 static void
6510 cfil_udp_gc_thread_sleep(bool forever)
6511 {
6512 if (forever) {
6513 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6514 THREAD_INTERRUPTIBLE);
6515 } else {
6516 uint64_t deadline = 0;
6517 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6518 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6519
6520 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6521 THREAD_INTERRUPTIBLE, deadline);
6522 }
6523 }
6524
6525 static void
6526 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6527 {
6528 #pragma unused(v, w)
6529
6530 ASSERT(cfil_udp_gc_thread == current_thread());
6531 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6532
6533 // Kick off gc shortly
6534 cfil_udp_gc_thread_sleep(false);
6535 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6536 /* NOTREACHED */
6537 }
6538
6539 static void
6540 cfil_info_udp_expire(void *v, wait_result_t w)
6541 {
6542 #pragma unused(v, w)
6543
6544 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6545 static uint32_t expired_count = 0;
6546
6547 struct cfil_info *cfil_info;
6548 struct cfil_hash_entry *hash_entry;
6549 struct cfil_db *db;
6550 struct socket *so;
6551 u_int32_t current_time = 0;
6552
6553 current_time = net_uptime();
6554
6555 // Get all expired UDP flow ids
6556 cfil_rw_lock_shared(&cfil_lck_rw);
6557
6558 if (cfil_sock_udp_attached_count == 0) {
6559 cfil_rw_unlock_shared(&cfil_lck_rw);
6560 goto go_sleep;
6561 }
6562
6563 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6564 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6565 break;
6566 }
6567
6568 if (IS_UDP(cfil_info->cfi_so)) {
6569 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6570 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6571 cfil_info_buffer_threshold_exceeded(cfil_info)) {
6572 expired_array[expired_count] = cfil_info->cfi_sock_id;
6573 expired_count++;
6574 }
6575 }
6576 }
6577 cfil_rw_unlock_shared(&cfil_lck_rw);
6578
6579 if (expired_count == 0) {
6580 goto go_sleep;
6581 }
6582
6583 for (uint32_t i = 0; i < expired_count; i++) {
6584 // Search for socket (UDP only and lock so)
6585 so = cfil_socket_from_sock_id(expired_array[i], true);
6586 if (so == NULL) {
6587 continue;
6588 }
6589
6590 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6591 if (cfil_info == NULL) {
6592 goto unlock;
6593 }
6594
6595 db = so->so_cfil_db;
6596 hash_entry = cfil_info->cfi_hash_entry;
6597
6598 if (db == NULL || hash_entry == NULL) {
6599 goto unlock;
6600 }
6601
6602 #if GC_DEBUG || LIFECYCLE_DEBUG
6603 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6604 #endif
6605
6606 cfil_db_delete_entry(db, hash_entry);
6607 cfil_info_free(cfil_info);
6608 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6609
6610 if (so->so_flags & SOF_CONTENT_FILTER) {
6611 if (db->cfdb_count == 0) {
6612 so->so_flags &= ~SOF_CONTENT_FILTER;
6613 }
6614 VERIFY(so->so_usecount > 0);
6615 so->so_usecount--;
6616 }
6617 unlock:
6618 socket_unlock(so, 1);
6619 }
6620
6621 #if GC_DEBUG
6622 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6623 #endif
6624 expired_count = 0;
6625
6626 go_sleep:
6627
6628 // Sleep forever (until waken up) if no more UDP flow to clean
6629 cfil_rw_lock_shared(&cfil_lck_rw);
6630 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
6631 cfil_rw_unlock_shared(&cfil_lck_rw);
6632 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
6633 /* NOTREACHED */
6634 }
6635
6636 struct m_tag *
6637 cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
6638 {
6639 struct m_tag *tag = NULL;
6640 struct cfil_tag *ctag = NULL;
6641 struct cfil_hash_entry *hash_entry = NULL;
6642
6643 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6644 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6645 return NULL;
6646 }
6647
6648 /* Allocate a tag */
6649 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6650 sizeof(struct cfil_tag), M_DONTWAIT, m);
6651
6652 if (tag) {
6653 ctag = (struct cfil_tag*)(tag + 1);
6654 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6655 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6656
6657 hash_entry = cfil_info->cfi_hash_entry;
6658 if (hash_entry->cfentry_family == AF_INET6) {
6659 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6660 &hash_entry->cfentry_faddr.addr6,
6661 hash_entry->cfentry_fport);
6662 } else if (hash_entry->cfentry_family == AF_INET) {
6663 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6664 hash_entry->cfentry_faddr.addr46.ia46_addr4,
6665 hash_entry->cfentry_fport);
6666 }
6667 m_tag_prepend(m, tag);
6668 return tag;
6669 }
6670 return NULL;
6671 }
6672
6673 struct m_tag *
6674 cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6675 struct sockaddr **faddr)
6676 {
6677 struct m_tag *tag = NULL;
6678 struct cfil_tag *ctag = NULL;
6679
6680 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6681 if (tag) {
6682 ctag = (struct cfil_tag *)(tag + 1);
6683 if (state_change_cnt) {
6684 *state_change_cnt = ctag->cfil_so_state_change_cnt;
6685 }
6686 if (options) {
6687 *options = ctag->cfil_so_options;
6688 }
6689 if (faddr) {
6690 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6691 }
6692
6693 /*
6694 * Unlink tag and hand it over to caller.
6695 * Note that caller will be responsible to free it.
6696 */
6697 m_tag_unlink(m, tag);
6698 return tag;
6699 }
6700 return NULL;
6701 }