]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/content_filter.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / net / content_filter.c
CommitLineData
fe8ab488 1/*
d9a64523 2 * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
fe8ab488
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*
25 * THEORY OF OPERATION
26 *
27 * The socket content filter subsystem provides a way for user space agents to
28 * make filtering decisions based on the content of the data being sent and
29 * received by TCP/IP sockets.
30 *
31 * A content filter user space agents gets a copy of the data and the data is
32 * also kept in kernel buffer until the user space agents makes a pass or drop
33 * decision. This unidirectional flow of content avoids unnecessary data copies
34 * back to the kernel.
0a7de745 35 *
fe8ab488
A
36 * A user space filter agent opens a kernel control socket with the name
37 * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
38 * When connected, a "struct content_filter" is created and set as the
39 * "unitinfo" of the corresponding kernel control socket instance.
40 *
41 * The socket content filter subsystem exchanges messages with the user space
42 * filter agent until an ultimate pass or drop decision is made by the
43 * user space filter agent.
44 *
45 * It should be noted that messages about many TCP/IP sockets can be multiplexed
46 * over a single kernel control socket.
47 *
48 * Notes:
49 * - The current implementation is limited to TCP sockets.
50 * - The current implementation supports up to two simultaneous content filters
51 * for the sake of simplicity of the implementation.
52 *
53 *
54 * NECP FILTER CONTROL UNIT
55 *
56 * A user space filter agent uses the Network Extension Control Policy (NECP)
5ba3f43e 57 * database to specify which TCP/IP sockets need to be filtered. The NECP
fe8ab488
A
58 * criteria may be based on a variety of properties like user ID or proc UUID.
59 *
60 * The NECP "filter control unit" is used by the socket content filter subsystem
61 * to deliver the relevant TCP/IP content information to the appropriate
62 * user space filter agent via its kernel control socket instance.
63 * This works as follows:
64 *
65 * 1) The user space filter agent specifies an NECP filter control unit when
66 * in adds its filtering rules to the NECP database.
67 *
68 * 2) The user space filter agent also sets its NECP filter control unit on the
69 * content filter kernel control socket via the socket option
70 * CFIL_OPT_NECP_CONTROL_UNIT.
71 *
72 * 3) The NECP database is consulted to find out if a given TCP/IP socket
73 * needs to be subjected to content filtering and returns the corresponding
74 * NECP filter control unit -- the NECP filter control unit is actually
75 * stored in the TCP/IP socket structure so the NECP lookup is really simple.
76 *
77 * 4) The NECP filter control unit is then used to find the corresponding
78 * kernel control socket instance.
79 *
5ba3f43e 80 * Note: NECP currently supports a single filter control unit per TCP/IP socket
fe8ab488
A
81 * but this restriction may be soon lifted.
82 *
83 *
84 * THE MESSAGING PROTOCOL
85 *
86 * The socket content filter subsystem and a user space filter agent
87 * communicate over the kernel control socket via an asynchronous
88 * messaging protocol (this is not a request-response protocol).
89 * The socket content filter subsystem sends event messages to the user
90 * space filter agent about the TCP/IP sockets it is interested to filter.
91 * The user space filter agent sends action messages to either allow
92 * data to pass or to disallow the data flow (and drop the connection).
93 *
94 * All messages over a content filter kernel control socket share the same
95 * common header of type "struct cfil_msg_hdr". The message type tells if
96 * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
97 * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
98 * Note the message header length field may be padded for alignment and can
99 * be larger than the actual content of the message.
100 * The field "cfm_op" describe the kind of event or action.
101 *
102 * Here are the kinds of content filter events:
103 * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
104 * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
105 * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
106 * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
107 *
108 *
109 * EVENT MESSAGES
110 *
111 * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
112 * data that is being sent or received. The position of this span of data
113 * in the data flow is described by a set of start and end offsets. These
114 * are absolute 64 bits offsets. The first byte sent (or received) starts
115 * at offset 0 and ends at offset 1. The length of the content data
116 * is given by the difference between the end offset and the start offset.
117 *
118 * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
119 * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
5ba3f43e 120 * action message is sent by the user space filter agent.
fe8ab488
A
121 *
122 * Note: absolute 64 bits offsets should be large enough for the foreseeable
5ba3f43e 123 * future. A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
fe8ab488
A
124 * 2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
125 *
5ba3f43e 126 * They are two kinds of primary content filter actions:
fe8ab488
A
127 * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
128 * - CFM_OP_DROP: to shutdown socket and disallow further data flow
129 *
5ba3f43e
A
130 * There is also an action to mark a given client flow as already filtered
131 * at a higher level, CFM_OP_BLESS_CLIENT.
132 *
fe8ab488
A
133 *
134 * ACTION MESSAGES
135 *
136 * The CFM_OP_DATA_UPDATE action messages let the user space filter
137 * agent allow data to flow up to the specified pass offset -- there
138 * is a pass offset for outgoing data and a pass offset for incoming data.
139 * When a new TCP/IP socket is attached to the content filter, each pass offset
140 * is initially set to 0 so not data is allowed to pass by default.
141 * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
142 * then the data flow becomes unrestricted.
143 *
144 * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
145 * with a pass offset smaller than the pass offset of a previous
146 * CFM_OP_DATA_UPDATE message is silently ignored.
147 *
148 * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
149 * to tell the kernel how much data it wants to see by using the peek offsets.
150 * Just like pass offsets, there is a peek offset for each direction.
151 * When a new TCP/IP socket is attached to the content filter, each peek offset
152 * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
153 * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
154 * with a greater than 0 peek offset is sent by the user space filter agent.
155 * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
156 * then the flow of update data events becomes unrestricted.
157 *
158 * Note that peek offsets cannot be smaller than the corresponding pass offset.
159 * Also a peek offsets cannot be smaller than the corresponding end offset
160 * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
161 * to set a too small peek value is silently ignored.
162 *
163 *
164 * PER SOCKET "struct cfil_info"
165 *
166 * As soon as a TCP/IP socket gets attached to a content filter, a
167 * "struct cfil_info" is created to hold the content filtering state for this
168 * socket.
169 *
170 * The content filtering state is made of the following information
171 * for each direction:
172 * - The current pass offset;
173 * - The first and last offsets of the data pending, waiting for a filtering
174 * decision;
175 * - The inject queue for data that passed the filters and that needs
176 * to be re-injected;
177 * - A content filter specific state in a set of "struct cfil_entry"
178 *
179 *
180 * CONTENT FILTER STATE "struct cfil_entry"
181 *
182 * The "struct cfil_entry" maintains the information most relevant to the
183 * message handling over a kernel control socket with a user space filter agent.
184 *
185 * The "struct cfil_entry" holds the NECP filter control unit that corresponds
186 * to the kernel control socket unit it corresponds to and also has a pointer
187 * to the corresponding "struct content_filter".
188 *
189 * For each direction, "struct cfil_entry" maintains the following information:
190 * - The pass offset
191 * - The peek offset
192 * - The offset of the last data peeked at by the filter
193 * - A queue of data that's waiting to be delivered to the user space filter
194 * agent on the kernel control socket
195 * - A queue of data for which event messages have been sent on the kernel
196 * control socket and are pending for a filtering decision.
197 *
198 *
199 * CONTENT FILTER QUEUES
200 *
201 * Data that is being filtered is steered away from the TCP/IP socket buffer
5ba3f43e 202 * and instead will sit in one of three content filter queues until the data
fe8ab488
A
203 * can be re-injected into the TCP/IP socket buffer.
204 *
205 * A content filter queue is represented by "struct cfil_queue" that contains
206 * a list of mbufs and the start and end offset of the data span of
207 * the list of mbufs.
208 *
209 * The data moves into the three content filter queues according to this
210 * sequence:
211 * a) The "cfe_ctl_q" of "struct cfil_entry"
212 * b) The "cfe_pending_q" of "struct cfil_entry"
213 * c) The "cfi_inject_q" of "struct cfil_info"
214 *
5ba3f43e 215 * Note: The sequence (a),(b) may be repeated several times if there is more
fe8ab488
A
216 * than one content filter attached to the TCP/IP socket.
217 *
218 * The "cfe_ctl_q" queue holds data than cannot be delivered to the
219 * kernel conntrol socket for two reasons:
220 * - The peek offset is less that the end offset of the mbuf data
221 * - The kernel control socket is flow controlled
222 *
223 * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
224 * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
225 * socket and are waiting for a pass action message fromn the user space
226 * filter agent. An mbuf length must be fully allowed to pass to be removed
227 * from the cfe_pending_q.
228 *
229 * The "cfi_inject_q" queue holds data that has been fully allowed to pass
230 * by the user space filter agent and that needs to be re-injected into the
231 * TCP/IP socket.
232 *
233 *
234 * IMPACT ON FLOW CONTROL
235 *
236 * An essential aspect of the content filer subsystem is to minimize the
237 * impact on flow control of the TCP/IP sockets being filtered.
238 *
239 * The processing overhead of the content filtering may have an effect on
240 * flow control by adding noticeable delays and cannot be eliminated --
241 * care must be taken by the user space filter agent to minimize the
242 * processing delays.
243 *
244 * The amount of data being filtered is kept in buffers while waiting for
245 * a decision by the user space filter agent. This amount of data pending
246 * needs to be subtracted from the amount of data available in the
247 * corresponding TCP/IP socket buffer. This is done by modifying
248 * sbspace() and tcp_sbspace() to account for amount of data pending
249 * in the content filter.
250 *
251 *
252 * LOCKING STRATEGY
253 *
254 * The global state of content filter subsystem is protected by a single
255 * read-write lock "cfil_lck_rw". The data flow can be done with the
256 * cfil read-write lock held as shared so it can be re-entered from multiple
257 * threads.
258 *
259 * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
260 * protected by the socket lock.
261 *
262 * A TCP/IP socket lock cannot be taken while the cfil read-write lock
263 * is held. That's why we have some sequences where we drop the cfil read-write
264 * lock before taking the TCP/IP lock.
265 *
266 * It is also important to lock the TCP/IP socket buffer while the content
267 * filter is modifying the amount of pending data. Otherwise the calculations
268 * in sbspace() and tcp_sbspace() could be wrong.
269 *
270 * The "cfil_lck_rw" protects "struct content_filter" and also the fields
271 * "cfe_link" and "cfe_filter" of "struct cfil_entry".
272 *
273 * Actually "cfe_link" and "cfe_filter" are protected by both by
274 * "cfil_lck_rw" and the socket lock: they may be modified only when
275 * "cfil_lck_rw" is exclusive and the socket is locked.
276 *
277 * To read the other fields of "struct content_filter" we have to take
278 * "cfil_lck_rw" in shared mode.
279 *
280 *
281 * LIMITATIONS
282 *
283 * - For TCP sockets only
284 *
285 * - Does not support TCP unordered messages
286 */
287
288/*
289 * TO DO LIST
290 *
291 * SOONER:
292 *
293 * Deal with OOB
294 *
295 * LATER:
296 *
297 * If support datagram, enqueue control and address mbufs as well
298 */
299
300#include <sys/types.h>
301#include <sys/kern_control.h>
302#include <sys/queue.h>
303#include <sys/domain.h>
304#include <sys/protosw.h>
305#include <sys/syslog.h>
d9a64523
A
306#include <sys/systm.h>
307#include <sys/param.h>
308#include <sys/mbuf.h>
fe8ab488
A
309
310#include <kern/locks.h>
311#include <kern/zalloc.h>
312#include <kern/debug.h>
313
314#include <net/content_filter.h>
315
316#include <netinet/in_pcb.h>
317#include <netinet/tcp.h>
318#include <netinet/tcp_var.h>
d9a64523
A
319#include <netinet/udp.h>
320#include <netinet/udp_var.h>
fe8ab488
A
321
322#include <string.h>
323#include <libkern/libkern.h>
d9a64523 324#include <kern/sched_prim.h>
fe8ab488 325
0a7de745 326#define MAX_CONTENT_FILTER 2
fe8ab488
A
327
328struct cfil_entry;
329
330/*
331 * The structure content_filter represents a user space content filter
332 * It's created and associated with a kernel control socket instance
333 */
334struct content_filter {
0a7de745
A
335 kern_ctl_ref cf_kcref;
336 u_int32_t cf_kcunit;
337 u_int32_t cf_flags;
fe8ab488 338
0a7de745 339 uint32_t cf_necp_control_unit;
fe8ab488 340
0a7de745 341 uint32_t cf_sock_count;
fe8ab488
A
342 TAILQ_HEAD(, cfil_entry) cf_sock_entries;
343};
344
0a7de745
A
345#define CFF_ACTIVE 0x01
346#define CFF_DETACHING 0x02
347#define CFF_FLOW_CONTROLLED 0x04
fe8ab488
A
348
349struct content_filter **content_filters = NULL;
0a7de745
A
350uint32_t cfil_active_count = 0; /* Number of active content filters */
351uint32_t cfil_sock_attached_count = 0; /* Number of sockets attachements */
352uint32_t cfil_sock_udp_attached_count = 0; /* Number of UDP sockets attachements */
fe8ab488
A
353uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
354
355static kern_ctl_ref cfil_kctlref = NULL;
356
357static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
358static lck_attr_t *cfil_lck_attr = NULL;
359static lck_grp_t *cfil_lck_grp = NULL;
360decl_lck_rw_data(static, cfil_lck_rw);
361
0a7de745 362#define CFIL_RW_LCK_MAX 8
fe8ab488
A
363
364int cfil_rw_nxt_lck = 0;
365void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
366
367int cfil_rw_nxt_unlck = 0;
368void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
369
0a7de745
A
370#define CONTENT_FILTER_ZONE_NAME "content_filter"
371#define CONTENT_FILTER_ZONE_MAX 10
372static struct zone *content_filter_zone = NULL; /* zone for content_filter */
fe8ab488
A
373
374
0a7de745
A
375#define CFIL_INFO_ZONE_NAME "cfil_info"
376#define CFIL_INFO_ZONE_MAX 1024
377static struct zone *cfil_info_zone = NULL; /* zone for cfil_info */
fe8ab488
A
378
379MBUFQ_HEAD(cfil_mqhead);
380
381struct cfil_queue {
0a7de745
A
382 uint64_t q_start; /* offset of first byte in queue */
383 uint64_t q_end; /* offset of last byte in queue */
384 struct cfil_mqhead q_mq;
fe8ab488
A
385};
386
387/*
388 * struct cfil_entry
389 *
390 * The is one entry per content filter
391 */
392struct cfil_entry {
393 TAILQ_ENTRY(cfil_entry) cfe_link;
0a7de745 394 struct content_filter *cfe_filter;
fe8ab488 395
0a7de745
A
396 struct cfil_info *cfe_cfil_info;
397 uint32_t cfe_flags;
398 uint32_t cfe_necp_control_unit;
399 struct timeval cfe_last_event; /* To user space */
400 struct timeval cfe_last_action; /* From user space */
fe8ab488
A
401
402 struct cfe_buf {
403 /*
404 * cfe_pending_q holds data that has been delivered to
405 * the filter and for which we are waiting for an action
406 */
0a7de745 407 struct cfil_queue cfe_pending_q;
fe8ab488
A
408 /*
409 * This queue is for data that has not be delivered to
410 * the content filter (new data, pass peek or flow control)
411 */
0a7de745 412 struct cfil_queue cfe_ctl_q;
fe8ab488 413
0a7de745
A
414 uint64_t cfe_pass_offset;
415 uint64_t cfe_peek_offset;
416 uint64_t cfe_peeked;
fe8ab488
A
417 } cfe_snd, cfe_rcv;
418};
419
0a7de745
A
420#define CFEF_CFIL_ATTACHED 0x0001 /* was attached to filter */
421#define CFEF_SENT_SOCK_ATTACHED 0x0002 /* sock attach event was sent */
422#define CFEF_DATA_START 0x0004 /* can send data event */
423#define CFEF_FLOW_CONTROLLED 0x0008 /* wait for flow control lift */
424#define CFEF_SENT_DISCONNECT_IN 0x0010 /* event was sent */
425#define CFEF_SENT_DISCONNECT_OUT 0x0020 /* event was sent */
426#define CFEF_SENT_SOCK_CLOSED 0x0040 /* closed event was sent */
427#define CFEF_CFIL_DETACHED 0x0080 /* filter was detached */
428
429
430#define CFI_ADD_TIME_LOG(cfil, t1, t0, op) \
431 struct timeval _tdiff; \
432 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) { \
433 timersub(t1, t0, &_tdiff); \
434 (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
435 (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op; \
436 (cfil)->cfi_op_list_ctr ++; \
437 }
5ba3f43e 438
d9a64523
A
439struct cfil_hash_entry;
440
fe8ab488
A
441/*
442 * struct cfil_info
443 *
444 * There is a struct cfil_info per socket
445 */
446struct cfil_info {
0a7de745
A
447 TAILQ_ENTRY(cfil_info) cfi_link;
448 struct socket *cfi_so;
449 uint64_t cfi_flags;
450 uint64_t cfi_sock_id;
451 struct timeval64 cfi_first_event;
452 uint32_t cfi_op_list_ctr;
453 uint32_t cfi_op_time[CFI_MAX_TIME_LOG_ENTRY]; /* time interval in microseconds since first event */
454 unsigned char cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
fe8ab488
A
455
456 struct cfi_buf {
457 /*
458 * cfi_pending_first and cfi_pending_last describe the total
459 * amount of data outstanding for all the filters on
460 * this socket and data in the flow queue
461 * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
462 */
0a7de745
A
463 uint64_t cfi_pending_first;
464 uint64_t cfi_pending_last;
465 uint32_t cfi_pending_mbcnt;
466 uint32_t cfi_pending_mbnum;
467 uint32_t cfi_tail_drop_cnt;
fe8ab488
A
468 /*
469 * cfi_pass_offset is the minimum of all the filters
470 */
0a7de745 471 uint64_t cfi_pass_offset;
fe8ab488
A
472 /*
473 * cfi_inject_q holds data that needs to be re-injected
474 * into the socket after filtering and that can
475 * be queued because of flow control
476 */
0a7de745 477 struct cfil_queue cfi_inject_q;
fe8ab488
A
478 } cfi_snd, cfi_rcv;
479
0a7de745 480 struct cfil_entry cfi_entries[MAX_CONTENT_FILTER];
d9a64523 481 struct cfil_hash_entry *cfi_hash_entry;
5ba3f43e 482} __attribute__((aligned(8)));
fe8ab488 483
0a7de745
A
484#define CFIF_DROP 0x0001 /* drop action applied */
485#define CFIF_CLOSE_WAIT 0x0002 /* waiting for filter to close */
486#define CFIF_SOCK_CLOSED 0x0004 /* socket is closed */
487#define CFIF_RETRY_INJECT_IN 0x0010 /* inject in failed */
488#define CFIF_RETRY_INJECT_OUT 0x0020 /* inject out failed */
489#define CFIF_SHUT_WR 0x0040 /* shutdown write */
490#define CFIF_SHUT_RD 0x0080 /* shutdown read */
fe8ab488 491
0a7de745
A
492#define CFI_MASK_GENCNT 0xFFFFFFFF00000000 /* upper 32 bits */
493#define CFI_SHIFT_GENCNT 32
494#define CFI_MASK_FLOWHASH 0x00000000FFFFFFFF /* lower 32 bits */
495#define CFI_SHIFT_FLOWHASH 0
fe8ab488
A
496
497TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
498
0a7de745
A
499#define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
500#define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
fe8ab488 501
d9a64523
A
502/*
503 * UDP Socket Support
504 */
505LIST_HEAD(cfilhashhead, cfil_hash_entry);
506#define CFILHASHSIZE 16
507#define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
508#define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
509#define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
0a7de745 510 ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
d9a64523 511#define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
0a7de745 512 cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
d9a64523
A
513#define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
514
515/*
516 * UDP Garbage Collection:
517 */
518static struct thread *cfil_udp_gc_thread;
519#define UDP_FLOW_GC_IDLE_TO 30 // Flow Idle Timeout in seconds
520#define UDP_FLOW_GC_ACTION_TO 10 // Flow Action Timeout (no action from user space) in seconds
521#define UDP_FLOW_GC_MAX_COUNT 100 // Max UDP flows to be handled per run
522#define UDP_FLOW_GC_RUN_INTERVAL_NSEC (10 * NSEC_PER_SEC) // GC wakes up every 10 seconds
523
524/*
525 * UDP flow queue thresholds
526 */
527#define UDP_FLOW_GC_MBUF_CNT_MAX (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
528#define UDP_FLOW_GC_MBUF_NUM_MAX (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
529#define UDP_FLOW_GC_MBUF_SHIFT 5 // Shift to get 1/32 of platform limits
530/*
531 * UDP flow queue threshold globals:
532 */
533static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
534static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
535
536/*
537 * struct cfil_hash_entry
538 *
539 * Hash entry for cfil_info
540 */
541struct cfil_hash_entry {
0a7de745
A
542 LIST_ENTRY(cfil_hash_entry) cfentry_link;
543 struct cfil_info *cfentry_cfil;
544 u_short cfentry_fport;
545 u_short cfentry_lport;
546 sa_family_t cfentry_family;
547 u_int32_t cfentry_flowhash;
548 u_int32_t cfentry_lastused;
549 union {
550 /* foreign host table entry */
551 struct in_addr_4in6 addr46;
552 struct in6_addr addr6;
553 } cfentry_faddr;
554 union {
555 /* local host table entry */
556 struct in_addr_4in6 addr46;
557 struct in6_addr addr6;
558 } cfentry_laddr;
d9a64523
A
559};
560
561/*
562 * struct cfil_db
563 *
564 * For each UDP socket, this is a hash table maintaining all cfil_info structs
565 * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
566 */
567struct cfil_db {
0a7de745
A
568 struct socket *cfdb_so;
569 uint32_t cfdb_count; /* Number of total content filters */
570 struct cfilhashhead *cfdb_hashbase;
571 u_long cfdb_hashmask;
d9a64523
A
572 struct cfil_hash_entry *cfdb_only_entry; /* Optimization for connected UDP */
573};
574
575/*
576 * CFIL specific mbuf tag:
577 * Save state of socket at the point of data entry into cfil.
578 * Use saved state for reinjection at protocol layer.
579 */
580struct cfil_tag {
581 union sockaddr_in_4_6 cfil_faddr;
582 uint32_t cfil_so_state_change_cnt;
583 short cfil_so_options;
584};
585
586#define CFIL_HASH_ENTRY_ZONE_NAME "cfil_entry_hash"
587#define CFIL_HASH_ENTRY_ZONE_MAX 1024
588static struct zone *cfil_hash_entry_zone = NULL;
589
590#define CFIL_DB_ZONE_NAME "cfil_db"
591#define CFIL_DB_ZONE_MAX 1024
592static struct zone *cfil_db_zone = NULL;
593
fe8ab488
A
594/*
595 * Statistics
596 */
597
598struct cfil_stats cfil_stats;
599
600/*
601 * For troubleshooting
602 */
603int cfil_log_level = LOG_ERR;
604int cfil_debug = 1;
605
d9a64523
A
606// Debug controls added for selective debugging.
607// Disabled for production. If enabled,
608// these will have performance impact
609#define LIFECYCLE_DEBUG 0
610#define VERDICT_DEBUG 0
611#define DATA_DEBUG 0
612#define SHOW_DEBUG 0
613#define GC_DEBUG 0
614
fe8ab488
A
615/*
616 * Sysctls for logs and statistics
617 */
618static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
0a7de745 619 struct sysctl_req *);
fe8ab488 620static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
0a7de745 621 struct sysctl_req *);
fe8ab488 622
0a7de745 623SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
fe8ab488 624
0a7de745
A
625SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
626 &cfil_log_level, 0, "");
fe8ab488 627
0a7de745
A
628SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
629 &cfil_debug, 0, "");
fe8ab488 630
0a7de745
A
631SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
632 &cfil_sock_attached_count, 0, "");
fe8ab488 633
0a7de745
A
634SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
635 &cfil_active_count, 0, "");
fe8ab488 636
0a7de745
A
637SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
638 &cfil_close_wait_timeout, 0, "");
fe8ab488
A
639
640static int cfil_sbtrim = 1;
0a7de745
A
641SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
642 &cfil_sbtrim, 0, "");
fe8ab488 643
0a7de745
A
644SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
645 0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
fe8ab488 646
0a7de745
A
647SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
648 0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
fe8ab488 649
0a7de745
A
650SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
651 &cfil_stats, cfil_stats, "");
fe8ab488
A
652
653/*
654 * Forward declaration to appease the compiler
655 */
d9a64523 656static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
0a7de745 657 uint64_t, uint64_t);
d9a64523 658static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
5ba3f43e 659static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
d9a64523
A
660static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
661static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
0a7de745 662 struct mbuf *, struct mbuf *, uint32_t);
d9a64523 663static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
0a7de745 664 struct mbuf *, uint64_t);
fe8ab488 665static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
0a7de745 666 struct in_addr, u_int16_t);
fe8ab488 667static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
0a7de745 668 struct in6_addr *, u_int16_t);
d9a64523
A
669;
670static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t);
671static void cfil_info_free(struct cfil_info *);
672static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
673static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
674static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
675static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
676static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
677static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
fe8ab488 678static void cfil_info_verify(struct cfil_info *);
d9a64523 679static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
0a7de745 680 uint64_t, uint64_t);
d9a64523 681static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
fe8ab488
A
682static void cfil_release_sockbuf(struct socket *, int);
683static int cfil_filters_attached(struct socket *);
684
685static void cfil_rw_lock_exclusive(lck_rw_t *);
686static void cfil_rw_unlock_exclusive(lck_rw_t *);
687static void cfil_rw_lock_shared(lck_rw_t *);
688static void cfil_rw_unlock_shared(lck_rw_t *);
689static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
690static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
691
d9a64523
A
692static unsigned int cfil_data_length(struct mbuf *, int *, int *);
693static errno_t cfil_db_init(struct socket *);
694static void cfil_db_free(struct socket *so);
695struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
696struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
697struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
698void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
699struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
700struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
701static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
0a7de745 702 struct mbuf *, struct mbuf *, uint32_t);
d9a64523
A
703static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
704static void cfil_sock_udp_is_closed(struct socket *);
0a7de745 705static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
d9a64523
A
706static int cfil_sock_udp_shutdown(struct socket *, int *);
707static void cfil_sock_udp_close_wait(struct socket *);
708static void cfil_sock_udp_buf_update(struct sockbuf *);
709static int cfil_filters_udp_attached(struct socket *, bool);
710static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
0a7de745
A
711 struct in6_addr **, struct in6_addr **,
712 u_int16_t *, u_int16_t *);
d9a64523 713static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
0a7de745
A
714 struct in_addr *, struct in_addr *,
715 u_int16_t *, u_int16_t *);
d9a64523
A
716static void cfil_info_log(int, struct cfil_info *, const char *);
717void cfil_filter_show(u_int32_t);
718void cfil_info_show(void);
719bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
720bool cfil_info_action_timed_out(struct cfil_info *, int);
721bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
722struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
723static void cfil_udp_gc_thread_func(void *, wait_result_t);
724static void cfil_info_udp_expire(void *, wait_result_t);
725
726bool check_port(struct sockaddr *, u_short);
fe8ab488
A
727
728/*
729 * Content filter global read write lock
730 */
731
732static void
733cfil_rw_lock_exclusive(lck_rw_t *lck)
734{
735 void *lr_saved;
736
737 lr_saved = __builtin_return_address(0);
738
739 lck_rw_lock_exclusive(lck);
740
741 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
742 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
743}
744
745static void
746cfil_rw_unlock_exclusive(lck_rw_t *lck)
747{
748 void *lr_saved;
749
750 lr_saved = __builtin_return_address(0);
751
752 lck_rw_unlock_exclusive(lck);
753
754 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
755 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
756}
757
758static void
759cfil_rw_lock_shared(lck_rw_t *lck)
760{
761 void *lr_saved;
762
763 lr_saved = __builtin_return_address(0);
764
765 lck_rw_lock_shared(lck);
766
767 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
768 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
769}
770
771static void
772cfil_rw_unlock_shared(lck_rw_t *lck)
773{
774 void *lr_saved;
775
776 lr_saved = __builtin_return_address(0);
777
778 lck_rw_unlock_shared(lck);
779
780 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
781 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
782}
783
784static boolean_t
785cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
786{
787 void *lr_saved;
788 boolean_t upgraded;
789
790 lr_saved = __builtin_return_address(0);
791
792 upgraded = lck_rw_lock_shared_to_exclusive(lck);
793 if (upgraded) {
794 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
795 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
796 }
0a7de745 797 return upgraded;
fe8ab488
A
798}
799
800static void
801cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
802{
803 void *lr_saved;
804
805 lr_saved = __builtin_return_address(0);
806
807 lck_rw_lock_exclusive_to_shared(lck);
808
809 cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
810 cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
811}
812
813static void
814cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
815{
5ba3f43e
A
816#if !MACH_ASSERT
817#pragma unused(lck, exclusive)
818#endif
819 LCK_RW_ASSERT(lck,
fe8ab488
A
820 exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
821}
822
fe8ab488
A
823/*
824 * Return the number of bytes in the mbuf chain using the same
825 * method as m_length() or sballoc()
d9a64523
A
826 *
827 * Returns data len - starting from PKT start
828 * - retmbcnt - optional param to get total mbuf bytes in chain
829 * - retmbnum - optional param to get number of mbufs in chain
fe8ab488
A
830 */
831static unsigned int
d9a64523 832cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
fe8ab488
A
833{
834 struct mbuf *m0;
d9a64523 835 unsigned int pktlen = 0;
fe8ab488 836 int mbcnt;
d9a64523
A
837 int mbnum;
838
839 // Locate the start of data
840 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
0a7de745 841 if (m0->m_flags & M_PKTHDR) {
d9a64523 842 break;
0a7de745 843 }
d9a64523
A
844 }
845 if (m0 == NULL) {
846 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
0a7de745 847 return 0;
d9a64523
A
848 }
849 m = m0;
fe8ab488 850
0a7de745
A
851 if (retmbcnt == NULL && retmbnum == NULL) {
852 return m_length(m);
853 }
fe8ab488
A
854
855 pktlen = 0;
856 mbcnt = 0;
d9a64523 857 mbnum = 0;
fe8ab488
A
858 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
859 pktlen += m0->m_len;
d9a64523 860 mbnum++;
fe8ab488 861 mbcnt += MSIZE;
0a7de745 862 if (m0->m_flags & M_EXT) {
fe8ab488 863 mbcnt += m0->m_ext.ext_size;
0a7de745 864 }
fe8ab488 865 }
d9a64523
A
866 if (retmbcnt) {
867 *retmbcnt = mbcnt;
868 }
869 if (retmbnum) {
870 *retmbnum = mbnum;
871 }
0a7de745 872 return pktlen;
fe8ab488
A
873}
874
d9a64523
A
875static struct mbuf *
876cfil_data_start(struct mbuf *m)
877{
878 struct mbuf *m0;
879
880 // Locate the start of data
881 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
0a7de745 882 if (m0->m_flags & M_PKTHDR) {
d9a64523 883 break;
0a7de745 884 }
d9a64523
A
885 }
886 return m0;
887}
888
fe8ab488
A
889/*
890 * Common mbuf queue utilities
891 */
892
893static inline void
894cfil_queue_init(struct cfil_queue *cfq)
895{
896 cfq->q_start = 0;
897 cfq->q_end = 0;
898 MBUFQ_INIT(&cfq->q_mq);
899}
900
901static inline uint64_t
902cfil_queue_drain(struct cfil_queue *cfq)
903{
904 uint64_t drained = cfq->q_start - cfq->q_end;
905 cfq->q_start = 0;
906 cfq->q_end = 0;
907 MBUFQ_DRAIN(&cfq->q_mq);
908
0a7de745 909 return drained;
fe8ab488
A
910}
911
912/* Return 1 when empty, 0 otherwise */
913static inline int
914cfil_queue_empty(struct cfil_queue *cfq)
915{
0a7de745 916 return MBUFQ_EMPTY(&cfq->q_mq);
fe8ab488
A
917}
918
919static inline uint64_t
920cfil_queue_offset_first(struct cfil_queue *cfq)
921{
0a7de745 922 return cfq->q_start;
fe8ab488
A
923}
924
925static inline uint64_t
926cfil_queue_offset_last(struct cfil_queue *cfq)
927{
0a7de745 928 return cfq->q_end;
fe8ab488
A
929}
930
931static inline uint64_t
932cfil_queue_len(struct cfil_queue *cfq)
933{
0a7de745 934 return cfq->q_end - cfq->q_start;
fe8ab488
A
935}
936
937/*
938 * Routines to verify some fundamental assumptions
939 */
940
941static void
942cfil_queue_verify(struct cfil_queue *cfq)
943{
d9a64523 944 mbuf_t chain;
fe8ab488
A
945 mbuf_t m;
946 mbuf_t n;
947 uint64_t queuesize = 0;
948
949 /* Verify offset are ordered */
950 VERIFY(cfq->q_start <= cfq->q_end);
951
952 /*
953 * When queue is empty, the offsets are equal otherwise the offsets
954 * are different
955 */
956 VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
0a7de745
A
957 (!MBUFQ_EMPTY(&cfq->q_mq) &&
958 cfq->q_start != cfq->q_end));
fe8ab488 959
d9a64523 960 MBUFQ_FOREACH(chain, &cfq->q_mq) {
fe8ab488 961 size_t chainsize = 0;
d9a64523
A
962 m = chain;
963 unsigned int mlen = cfil_data_length(m, NULL, NULL);
964 // skip the addr and control stuff if present
965 m = cfil_data_start(m);
fe8ab488 966
d9a64523 967 if (m == NULL ||
0a7de745
A
968 m == (void *)M_TAG_FREE_PATTERN ||
969 m->m_next == (void *)M_TAG_FREE_PATTERN ||
970 m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
fe8ab488 971 panic("%s - mq %p is free at %p", __func__,
0a7de745
A
972 &cfq->q_mq, m);
973 }
fe8ab488
A
974 for (n = m; n != NULL; n = n->m_next) {
975 if (n->m_type != MT_DATA &&
0a7de745
A
976 n->m_type != MT_HEADER &&
977 n->m_type != MT_OOBDATA) {
978 panic("%s - %p unsupported type %u", __func__,
979 n, n->m_type);
980 }
fe8ab488
A
981 chainsize += n->m_len;
982 }
0a7de745 983 if (mlen != chainsize) {
fe8ab488 984 panic("%s - %p m_length() %u != chainsize %lu",
0a7de745
A
985 __func__, m, mlen, chainsize);
986 }
fe8ab488
A
987 queuesize += chainsize;
988 }
0a7de745 989 if (queuesize != cfq->q_end - cfq->q_start) {
fe8ab488 990 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
0a7de745
A
991 m, queuesize, cfq->q_end - cfq->q_start);
992 }
fe8ab488
A
993}
994
995static void
996cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
997{
998 CFIL_QUEUE_VERIFY(cfq);
999
1000 MBUFQ_ENQUEUE(&cfq->q_mq, m);
1001 cfq->q_end += len;
1002
1003 CFIL_QUEUE_VERIFY(cfq);
1004}
1005
1006static void
1007cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1008{
1009 CFIL_QUEUE_VERIFY(cfq);
1010
d9a64523 1011 VERIFY(cfil_data_length(m, NULL, NULL) == len);
fe8ab488
A
1012
1013 MBUFQ_REMOVE(&cfq->q_mq, m);
1014 MBUFQ_NEXT(m) = NULL;
1015 cfq->q_start += len;
1016
1017 CFIL_QUEUE_VERIFY(cfq);
1018}
1019
1020static mbuf_t
1021cfil_queue_first(struct cfil_queue *cfq)
1022{
0a7de745 1023 return MBUFQ_FIRST(&cfq->q_mq);
fe8ab488
A
1024}
1025
1026static mbuf_t
1027cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1028{
1029#pragma unused(cfq)
0a7de745 1030 return MBUFQ_NEXT(m);
fe8ab488
A
1031}
1032
1033static void
1034cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1035{
1036 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1037 CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1038
1039 /* Verify the queues are ordered so that pending is before ctl */
1040 VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1041
1042 /* The peek offset cannot be less than the pass offset */
1043 VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1044
1045 /* Make sure we've updated the offset we peeked at */
1046 VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1047}
1048
1049static void
1050cfil_entry_verify(struct cfil_entry *entry)
1051{
1052 cfil_entry_buf_verify(&entry->cfe_snd);
1053 cfil_entry_buf_verify(&entry->cfe_rcv);
1054}
1055
1056static void
1057cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1058{
1059 CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1060
1061 VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1062 VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
1063}
1064
1065static void
1066cfil_info_verify(struct cfil_info *cfil_info)
1067{
1068 int i;
1069
0a7de745 1070 if (cfil_info == NULL) {
fe8ab488 1071 return;
0a7de745 1072 }
fe8ab488
A
1073
1074 cfil_info_buf_verify(&cfil_info->cfi_snd);
1075 cfil_info_buf_verify(&cfil_info->cfi_rcv);
1076
0a7de745 1077 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
fe8ab488 1078 cfil_entry_verify(&cfil_info->cfi_entries[i]);
0a7de745 1079 }
fe8ab488
A
1080}
1081
1082static void
1083verify_content_filter(struct content_filter *cfc)
1084{
1085 struct cfil_entry *entry;
1086 uint32_t count = 0;
1087
1088 VERIFY(cfc->cf_sock_count >= 0);
1089
1090 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1091 count++;
1092 VERIFY(cfc == entry->cfe_filter);
1093 }
1094 VERIFY(count == cfc->cf_sock_count);
1095}
1096
1097/*
1098 * Kernel control socket callbacks
1099 */
1100static errno_t
1101cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
0a7de745 1102 void **unitinfo)
fe8ab488 1103{
0a7de745 1104 errno_t error = 0;
fe8ab488
A
1105 struct content_filter *cfc = NULL;
1106
1107 CFIL_LOG(LOG_NOTICE, "");
1108
1109 cfc = zalloc(content_filter_zone);
1110 if (cfc == NULL) {
1111 CFIL_LOG(LOG_ERR, "zalloc failed");
1112 error = ENOMEM;
1113 goto done;
1114 }
1115 bzero(cfc, sizeof(struct content_filter));
1116
1117 cfil_rw_lock_exclusive(&cfil_lck_rw);
1118 if (content_filters == NULL) {
1119 struct content_filter **tmp;
1120
1121 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1122
1123 MALLOC(tmp,
0a7de745
A
1124 struct content_filter **,
1125 MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1126 M_TEMP,
1127 M_WAITOK | M_ZERO);
fe8ab488
A
1128
1129 cfil_rw_lock_exclusive(&cfil_lck_rw);
1130
1131 if (tmp == NULL && content_filters == NULL) {
1132 error = ENOMEM;
1133 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1134 goto done;
1135 }
1136 /* Another thread may have won the race */
0a7de745 1137 if (content_filters != NULL) {
fe8ab488 1138 FREE(tmp, M_TEMP);
0a7de745 1139 } else {
fe8ab488 1140 content_filters = tmp;
0a7de745 1141 }
fe8ab488
A
1142 }
1143
1144 if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1145 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1146 error = EINVAL;
1147 } else if (content_filters[sac->sc_unit - 1] != NULL) {
1148 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1149 error = EADDRINUSE;
1150 } else {
1151 /*
1152 * kernel control socket kcunit numbers start at 1
1153 */
1154 content_filters[sac->sc_unit - 1] = cfc;
1155
1156 cfc->cf_kcref = kctlref;
1157 cfc->cf_kcunit = sac->sc_unit;
1158 TAILQ_INIT(&cfc->cf_sock_entries);
1159
1160 *unitinfo = cfc;
1161 cfil_active_count++;
1162 }
1163 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1164done:
0a7de745 1165 if (error != 0 && cfc != NULL) {
fe8ab488 1166 zfree(content_filter_zone, cfc);
0a7de745 1167 }
fe8ab488 1168
0a7de745 1169 if (error == 0) {
fe8ab488 1170 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
0a7de745 1171 } else {
fe8ab488 1172 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
0a7de745 1173 }
fe8ab488
A
1174
1175 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
0a7de745 1176 error, cfil_active_count, sac->sc_unit);
fe8ab488 1177
0a7de745 1178 return error;
fe8ab488
A
1179}
1180
1181static errno_t
1182cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1183{
1184#pragma unused(kctlref)
0a7de745 1185 errno_t error = 0;
fe8ab488
A
1186 struct content_filter *cfc;
1187 struct cfil_entry *entry;
d9a64523 1188 uint64_t sock_flow_id = 0;
fe8ab488
A
1189
1190 CFIL_LOG(LOG_NOTICE, "");
1191
1192 if (content_filters == NULL) {
1193 CFIL_LOG(LOG_ERR, "no content filter");
1194 error = EINVAL;
1195 goto done;
1196 }
1197 if (kcunit > MAX_CONTENT_FILTER) {
1198 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
0a7de745 1199 kcunit, MAX_CONTENT_FILTER);
fe8ab488
A
1200 error = EINVAL;
1201 goto done;
1202 }
1203
1204 cfc = (struct content_filter *)unitinfo;
0a7de745 1205 if (cfc == NULL) {
fe8ab488 1206 goto done;
0a7de745 1207 }
fe8ab488
A
1208
1209 cfil_rw_lock_exclusive(&cfil_lck_rw);
1210 if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1211 CFIL_LOG(LOG_ERR, "bad unit info %u)",
0a7de745 1212 kcunit);
fe8ab488
A
1213 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1214 goto done;
1215 }
1216 cfc->cf_flags |= CFF_DETACHING;
1217 /*
1218 * Remove all sockets from the filter
1219 */
1220 while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1221 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1222
1223 verify_content_filter(cfc);
1224 /*
1225 * Accept all outstanding data by pushing to next filter
1226 * or back to socket
1227 *
1228 * TBD: Actually we should make sure all data has been pushed
1229 * back to socket
1230 */
1231 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1232 struct cfil_info *cfil_info = entry->cfe_cfil_info;
1233 struct socket *so = cfil_info->cfi_so;
d9a64523 1234 sock_flow_id = cfil_info->cfi_sock_id;
fe8ab488
A
1235
1236 /* Need to let data flow immediately */
1237 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
0a7de745 1238 CFEF_DATA_START;
fe8ab488
A
1239
1240 /*
1241 * Respect locking hierarchy
1242 */
1243 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1244
1245 socket_lock(so, 1);
1246
1247 /*
1248 * When cfe_filter is NULL the filter is detached
1249 * and the entry has been removed from cf_sock_entries
1250 */
d9a64523 1251 if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
fe8ab488
A
1252 cfil_rw_lock_exclusive(&cfil_lck_rw);
1253 goto release;
1254 }
d9a64523
A
1255
1256 (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
0a7de745
A
1257 CFM_MAX_OFFSET,
1258 CFM_MAX_OFFSET);
fe8ab488 1259
d9a64523 1260 (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
0a7de745
A
1261 CFM_MAX_OFFSET,
1262 CFM_MAX_OFFSET);
fe8ab488
A
1263
1264 cfil_rw_lock_exclusive(&cfil_lck_rw);
1265
1266 /*
d9a64523
A
1267 * Check again to make sure if the cfil_info is still valid
1268 * as the socket may have been unlocked when when calling
1269 * cfil_acquire_sockbuf()
fe8ab488 1270 */
d9a64523 1271 if (entry->cfe_filter == NULL ||
0a7de745 1272 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
fe8ab488 1273 goto release;
d9a64523 1274 }
fe8ab488
A
1275
1276 /* The filter is now detached */
1277 entry->cfe_flags |= CFEF_CFIL_DETACHED;
d9a64523
A
1278#if LIFECYCLE_DEBUG
1279 cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1280#endif
fe8ab488 1281 CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
0a7de745 1282 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
d9a64523 1283 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
fe8ab488
A
1284 cfil_filters_attached(so) == 0) {
1285 CFIL_LOG(LOG_NOTICE, "so %llx waking",
0a7de745 1286 (uint64_t)VM_KERNEL_ADDRPERM(so));
d9a64523 1287 wakeup((caddr_t)cfil_info);
fe8ab488
A
1288 }
1289
1290 /*
1291 * Remove the filter entry from the content filter
1292 * but leave the rest of the state intact as the queues
1293 * may not be empty yet
1294 */
1295 entry->cfe_filter = NULL;
1296 entry->cfe_necp_control_unit = 0;
1297
1298 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1299 cfc->cf_sock_count--;
1300release:
1301 socket_unlock(so, 1);
1302 }
1303 }
1304 verify_content_filter(cfc);
1305
1306 VERIFY(cfc->cf_sock_count == 0);
1307
1308 /*
1309 * Make filter inactive
1310 */
1311 content_filters[kcunit - 1] = NULL;
1312 cfil_active_count--;
1313 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1314
1315 zfree(content_filter_zone, cfc);
1316done:
0a7de745 1317 if (error == 0) {
fe8ab488 1318 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
0a7de745 1319 } else {
fe8ab488 1320 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
0a7de745 1321 }
fe8ab488
A
1322
1323 CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
0a7de745 1324 error, cfil_active_count, kcunit);
fe8ab488 1325
0a7de745 1326 return error;
fe8ab488
A
1327}
1328
1329/*
1330 * cfil_acquire_sockbuf()
1331 *
1332 * Prevent any other thread from acquiring the sockbuf
1333 * We use sb_cfil_thread as a semaphore to prevent other threads from
1334 * messing with the sockbuf -- see sblock()
1335 * Note: We do not set SB_LOCK here because the thread may check or modify
1336 * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1337 * sblock(), sbunlock() or sodefunct()
1338 */
1339static int
d9a64523 1340cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
fe8ab488
A
1341{
1342 thread_t tp = current_thread();
1343 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1344 lck_mtx_t *mutex_held;
1345 int error = 0;
1346
1347 /*
1348 * Wait until no thread is holding the sockbuf and other content
1349 * filter threads have released the sockbuf
1350 */
1351 while ((sb->sb_flags & SB_LOCK) ||
0a7de745
A
1352 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1353 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 1354 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 1355 } else {
fe8ab488 1356 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 1357 }
fe8ab488 1358
5ba3f43e 1359 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
fe8ab488
A
1360
1361 sb->sb_wantlock++;
1362 VERIFY(sb->sb_wantlock != 0);
1363
1364 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
0a7de745 1365 NULL);
fe8ab488
A
1366
1367 VERIFY(sb->sb_wantlock != 0);
1368 sb->sb_wantlock--;
1369 }
1370 /*
1371 * Use reference count for repetitive calls on same thread
1372 */
1373 if (sb->sb_cfil_refs == 0) {
1374 VERIFY(sb->sb_cfil_thread == NULL);
1375 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1376
1377 sb->sb_cfil_thread = tp;
1378 sb->sb_flags |= SB_LOCK;
1379 }
1380 sb->sb_cfil_refs++;
1381
1382 /* We acquire the socket buffer when we need to cleanup */
d9a64523 1383 if (cfil_info == NULL) {
fe8ab488 1384 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
0a7de745 1385 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488 1386 error = 0;
d9a64523 1387 } else if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488 1388 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745 1389 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
1390 error = EPIPE;
1391 }
1392
0a7de745 1393 return error;
fe8ab488
A
1394}
1395
1396static void
1397cfil_release_sockbuf(struct socket *so, int outgoing)
1398{
1399 struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1400 thread_t tp = current_thread();
1401
1402 socket_lock_assert_owned(so);
1403
0a7de745 1404 if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
fe8ab488 1405 panic("%s sb_cfil_thread %p not current %p", __func__,
0a7de745
A
1406 sb->sb_cfil_thread, tp);
1407 }
fe8ab488
A
1408 /*
1409 * Don't panic if we are defunct because SB_LOCK has
1410 * been cleared by sodefunct()
1411 */
0a7de745 1412 if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
fe8ab488 1413 panic("%s SB_LOCK not set on %p", __func__,
0a7de745
A
1414 sb);
1415 }
fe8ab488
A
1416 /*
1417 * We can unlock when the thread unwinds to the last reference
1418 */
1419 sb->sb_cfil_refs--;
1420 if (sb->sb_cfil_refs == 0) {
1421 sb->sb_cfil_thread = NULL;
1422 sb->sb_flags &= ~SB_LOCK;
1423
0a7de745 1424 if (sb->sb_wantlock > 0) {
fe8ab488 1425 wakeup(&sb->sb_flags);
0a7de745 1426 }
fe8ab488
A
1427 }
1428}
1429
1430cfil_sock_id_t
1431cfil_sock_id_from_socket(struct socket *so)
1432{
0a7de745
A
1433 if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1434 return so->so_cfil->cfi_sock_id;
1435 } else {
1436 return CFIL_SOCK_ID_NONE;
1437 }
fe8ab488
A
1438}
1439
d9a64523
A
1440static bool
1441cfil_socket_safe_lock(struct inpcb *inp)
1442{
0a7de745
A
1443 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1444 socket_lock(inp->inp_socket, 1);
1445 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1446 return true;
1447 }
1448 socket_unlock(inp->inp_socket, 1);
1449 }
1450 return false;
d9a64523
A
1451}
1452
fe8ab488 1453static struct socket *
d9a64523 1454cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
fe8ab488
A
1455{
1456 struct socket *so = NULL;
1457 u_int64_t gencnt = cfil_sock_id >> 32;
1458 u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1459 struct inpcb *inp = NULL;
d9a64523
A
1460 struct inpcbinfo *pcbinfo = NULL;
1461
1462#if VERDICT_DEBUG
1463 CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1464#endif
1465
0a7de745 1466 if (udp_only) {
d9a64523 1467 goto find_udp;
0a7de745 1468 }
fe8ab488 1469
d9a64523 1470 pcbinfo = &tcbinfo;
fe8ab488
A
1471 lck_rw_lock_shared(pcbinfo->ipi_lock);
1472 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1473 if (inp->inp_state != INPCB_STATE_DEAD &&
0a7de745
A
1474 inp->inp_socket != NULL &&
1475 inp->inp_flowhash == flowhash &&
1476 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1477 inp->inp_socket->so_cfil != NULL) {
1478 if (cfil_socket_safe_lock(inp)) {
d9a64523 1479 so = inp->inp_socket;
0a7de745 1480 }
d9a64523
A
1481 break;
1482 }
1483 }
1484 lck_rw_done(pcbinfo->ipi_lock);
1485 if (so != NULL) {
1486 goto done;
1487 }
1488
1489find_udp:
1490
1491 pcbinfo = &udbinfo;
1492 lck_rw_lock_shared(pcbinfo->ipi_lock);
1493 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1494 if (inp->inp_state != INPCB_STATE_DEAD &&
0a7de745
A
1495 inp->inp_socket != NULL &&
1496 inp->inp_socket->so_cfil_db != NULL &&
1497 (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1498 if (cfil_socket_safe_lock(inp)) {
d9a64523 1499 so = inp->inp_socket;
0a7de745 1500 }
fe8ab488
A
1501 break;
1502 }
1503 }
1504 lck_rw_done(pcbinfo->ipi_lock);
1505
d9a64523 1506done:
fe8ab488
A
1507 if (so == NULL) {
1508 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1509 CFIL_LOG(LOG_DEBUG,
0a7de745
A
1510 "no socket for sock_id %llx gencnt %llx flowhash %x",
1511 cfil_sock_id, gencnt, flowhash);
fe8ab488
A
1512 }
1513
0a7de745 1514 return so;
fe8ab488
A
1515}
1516
5ba3f43e
A
1517static struct socket *
1518cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1519{
1520 struct socket *so = NULL;
1521 struct inpcb *inp = NULL;
1522 struct inpcbinfo *pcbinfo = &tcbinfo;
1523
1524 lck_rw_lock_shared(pcbinfo->ipi_lock);
1525 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1526 if (inp->inp_state != INPCB_STATE_DEAD &&
0a7de745
A
1527 inp->inp_socket != NULL &&
1528 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
5ba3f43e 1529 *cfil_attached = (inp->inp_socket->so_cfil != NULL);
0a7de745 1530 if (cfil_socket_safe_lock(inp)) {
d9a64523 1531 so = inp->inp_socket;
0a7de745 1532 }
d9a64523
A
1533 break;
1534 }
1535 }
1536 lck_rw_done(pcbinfo->ipi_lock);
1537 if (so != NULL) {
1538 goto done;
1539 }
1540
1541 pcbinfo = &udbinfo;
1542 lck_rw_lock_shared(pcbinfo->ipi_lock);
1543 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1544 if (inp->inp_state != INPCB_STATE_DEAD &&
0a7de745
A
1545 inp->inp_socket != NULL &&
1546 uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
d9a64523 1547 *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
0a7de745 1548 if (cfil_socket_safe_lock(inp)) {
d9a64523 1549 so = inp->inp_socket;
0a7de745 1550 }
5ba3f43e
A
1551 break;
1552 }
1553 }
1554 lck_rw_done(pcbinfo->ipi_lock);
1555
d9a64523 1556done:
0a7de745 1557 return so;
5ba3f43e
A
1558}
1559
fe8ab488
A
1560static errno_t
1561cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
0a7de745 1562 int flags)
fe8ab488
A
1563{
1564#pragma unused(kctlref, flags)
0a7de745 1565 errno_t error = 0;
fe8ab488
A
1566 struct cfil_msg_hdr *msghdr;
1567 struct content_filter *cfc = (struct content_filter *)unitinfo;
1568 struct socket *so;
1569 struct cfil_msg_action *action_msg;
1570 struct cfil_entry *entry;
d9a64523 1571 struct cfil_info *cfil_info = NULL;
fe8ab488
A
1572
1573 CFIL_LOG(LOG_INFO, "");
1574
1575 if (content_filters == NULL) {
1576 CFIL_LOG(LOG_ERR, "no content filter");
1577 error = EINVAL;
1578 goto done;
1579 }
1580 if (kcunit > MAX_CONTENT_FILTER) {
1581 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
0a7de745 1582 kcunit, MAX_CONTENT_FILTER);
fe8ab488
A
1583 error = EINVAL;
1584 goto done;
1585 }
1586
1587 if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1588 CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1589 error = EINVAL;
1590 goto done;
1591 }
1592 msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1593 if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1594 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1595 error = EINVAL;
1596 goto done;
1597 }
1598 if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1599 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1600 error = EINVAL;
1601 goto done;
1602 }
1603 /* Validate action operation */
1604 switch (msghdr->cfm_op) {
0a7de745
A
1605 case CFM_OP_DATA_UPDATE:
1606 OSIncrementAtomic(
1607 &cfil_stats.cfs_ctl_action_data_update);
1608 break;
1609 case CFM_OP_DROP:
1610 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1611 break;
1612 case CFM_OP_BLESS_CLIENT:
1613 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1614 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
fe8ab488 1615 error = EINVAL;
0a7de745
A
1616 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1617 msghdr->cfm_len,
1618 msghdr->cfm_op);
fe8ab488
A
1619 goto done;
1620 }
0a7de745
A
1621 error = cfil_action_bless_client(kcunit, msghdr);
1622 goto done;
1623 default:
1624 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1625 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1626 error = EINVAL;
1627 goto done;
1628 }
1629 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1630 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1631 error = EINVAL;
1632 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1633 msghdr->cfm_len,
1634 msghdr->cfm_op);
1635 goto done;
1636 }
fe8ab488
A
1637 cfil_rw_lock_shared(&cfil_lck_rw);
1638 if (cfc != (void *)content_filters[kcunit - 1]) {
1639 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
0a7de745 1640 kcunit);
fe8ab488
A
1641 error = EINVAL;
1642 cfil_rw_unlock_shared(&cfil_lck_rw);
1643 goto done;
1644 }
d9a64523 1645 cfil_rw_unlock_shared(&cfil_lck_rw);
fe8ab488 1646
d9a64523
A
1647 // Search for socket (TCP+UDP and lock so)
1648 so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
fe8ab488
A
1649 if (so == NULL) {
1650 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
0a7de745 1651 msghdr->cfm_sock_id);
fe8ab488 1652 error = EINVAL;
fe8ab488
A
1653 goto done;
1654 }
fe8ab488 1655
d9a64523 1656 cfil_info = so->so_cfil_db != NULL ?
0a7de745 1657 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
fe8ab488 1658
d9a64523
A
1659 if (cfil_info == NULL) {
1660 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
0a7de745 1661 (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
fe8ab488
A
1662 error = EINVAL;
1663 goto unlock;
d9a64523 1664 } else if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488 1665 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
0a7de745 1666 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
1667 error = EINVAL;
1668 goto unlock;
1669 }
d9a64523 1670 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488
A
1671 if (entry->cfe_filter == NULL) {
1672 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
0a7de745 1673 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
1674 error = EINVAL;
1675 goto unlock;
1676 }
1677
0a7de745 1678 if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
fe8ab488 1679 entry->cfe_flags |= CFEF_DATA_START;
0a7de745 1680 } else {
fe8ab488 1681 CFIL_LOG(LOG_ERR,
0a7de745
A
1682 "so %llx attached not sent for %u",
1683 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
fe8ab488
A
1684 error = EINVAL;
1685 goto unlock;
1686 }
1687
1688 microuptime(&entry->cfe_last_action);
d9a64523 1689 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
fe8ab488
A
1690
1691 action_msg = (struct cfil_msg_action *)msghdr;
1692
1693 switch (msghdr->cfm_op) {
0a7de745 1694 case CFM_OP_DATA_UPDATE:
d9a64523 1695#if VERDICT_DEBUG
0a7de745
A
1696 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1697 (uint64_t)VM_KERNEL_ADDRPERM(so),
1698 cfil_info->cfi_sock_id,
1699 action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1700 action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
d9a64523 1701#endif
0a7de745
A
1702 if (action_msg->cfa_out_peek_offset != 0 ||
1703 action_msg->cfa_out_pass_offset != 0) {
1704 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1705 action_msg->cfa_out_pass_offset,
1706 action_msg->cfa_out_peek_offset);
1707 }
1708 if (error == EJUSTRETURN) {
1709 error = 0;
1710 }
1711 if (error != 0) {
fe8ab488 1712 break;
0a7de745
A
1713 }
1714 if (action_msg->cfa_in_peek_offset != 0 ||
1715 action_msg->cfa_in_pass_offset != 0) {
1716 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1717 action_msg->cfa_in_pass_offset,
1718 action_msg->cfa_in_peek_offset);
1719 }
1720 if (error == EJUSTRETURN) {
1721 error = 0;
1722 }
1723 break;
fe8ab488 1724
0a7de745
A
1725 case CFM_OP_DROP:
1726 error = cfil_action_drop(so, cfil_info, kcunit);
1727 break;
fe8ab488 1728
0a7de745
A
1729 default:
1730 error = EINVAL;
1731 break;
fe8ab488
A
1732 }
1733unlock:
1734 socket_unlock(so, 1);
1735done:
1736 mbuf_freem(m);
1737
0a7de745 1738 if (error == 0) {
fe8ab488 1739 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
0a7de745 1740 } else {
fe8ab488 1741 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
0a7de745 1742 }
fe8ab488 1743
0a7de745 1744 return error;
fe8ab488
A
1745}
1746
1747static errno_t
1748cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
0a7de745 1749 int opt, void *data, size_t *len)
fe8ab488
A
1750{
1751#pragma unused(kctlref, opt)
d9a64523 1752 struct cfil_info *cfil_info = NULL;
0a7de745 1753 errno_t error = 0;
fe8ab488
A
1754 struct content_filter *cfc = (struct content_filter *)unitinfo;
1755
1756 CFIL_LOG(LOG_NOTICE, "");
1757
1758 cfil_rw_lock_shared(&cfil_lck_rw);
1759
1760 if (content_filters == NULL) {
1761 CFIL_LOG(LOG_ERR, "no content filter");
1762 error = EINVAL;
1763 goto done;
1764 }
1765 if (kcunit > MAX_CONTENT_FILTER) {
1766 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
0a7de745 1767 kcunit, MAX_CONTENT_FILTER);
fe8ab488
A
1768 error = EINVAL;
1769 goto done;
1770 }
1771 if (cfc != (void *)content_filters[kcunit - 1]) {
1772 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
0a7de745 1773 kcunit);
fe8ab488
A
1774 error = EINVAL;
1775 goto done;
1776 }
1777 switch (opt) {
0a7de745
A
1778 case CFIL_OPT_NECP_CONTROL_UNIT:
1779 if (*len < sizeof(uint32_t)) {
1780 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1781 error = EINVAL;
1782 goto done;
1783 }
1784 if (data != NULL) {
1785 *(uint32_t *)data = cfc->cf_necp_control_unit;
1786 }
1787 break;
1788 case CFIL_OPT_GET_SOCKET_INFO:
1789 if (*len != sizeof(struct cfil_opt_sock_info)) {
1790 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1791 error = EINVAL;
1792 goto done;
1793 }
1794 if (data == NULL) {
1795 CFIL_LOG(LOG_ERR, "data not passed");
1796 error = EINVAL;
1797 goto done;
1798 }
5ba3f43e 1799
0a7de745
A
1800 struct cfil_opt_sock_info *sock_info =
1801 (struct cfil_opt_sock_info *) data;
5ba3f43e 1802
0a7de745
A
1803 // Unlock here so that we never hold both cfil_lck_rw and the
1804 // socket_lock at the same time. Otherwise, this can deadlock
1805 // because soclose() takes the socket_lock and then exclusive
1806 // cfil_lck_rw and we require the opposite order.
5ba3f43e 1807
0a7de745
A
1808 // WARNING: Be sure to never use anything protected
1809 // by cfil_lck_rw beyond this point.
1810 // WARNING: Be sure to avoid fallthrough and
1811 // goto return_already_unlocked from this branch.
1812 cfil_rw_unlock_shared(&cfil_lck_rw);
5ba3f43e 1813
0a7de745
A
1814 // Search (TCP+UDP) and lock socket
1815 struct socket *sock =
1816 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1817 if (sock == NULL) {
d9a64523 1818#if LIFECYCLE_DEBUG
0a7de745
A
1819 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1820 sock_info->cfs_sock_id);
d9a64523 1821#endif
0a7de745
A
1822 error = ENOENT;
1823 goto return_already_unlocked;
1824 }
d9a64523 1825
0a7de745
A
1826 cfil_info = (sock->so_cfil_db != NULL) ?
1827 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
5ba3f43e 1828
0a7de745 1829 if (cfil_info == NULL) {
d9a64523 1830#if LIFECYCLE_DEBUG
0a7de745
A
1831 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1832 (uint64_t)VM_KERNEL_ADDRPERM(sock));
d9a64523 1833#endif
0a7de745
A
1834 error = EINVAL;
1835 socket_unlock(sock, 1);
1836 goto return_already_unlocked;
1837 }
5ba3f43e 1838
0a7de745
A
1839 // Fill out family, type, and protocol
1840 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1841 sock_info->cfs_sock_type = sock->so_proto->pr_type;
1842 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1843
1844 // Source and destination addresses
1845 struct inpcb *inp = sotoinpcb(sock);
1846 if (inp->inp_vflag & INP_IPV6) {
1847 struct in6_addr *laddr = NULL, *faddr = NULL;
1848 u_int16_t lport = 0, fport = 0;
1849
1850 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1851 &laddr, &faddr, &lport, &fport);
1852 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1853 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1854 } else if (inp->inp_vflag & INP_IPV4) {
1855 struct in_addr laddr = {0}, faddr = {0};
1856 u_int16_t lport = 0, fport = 0;
1857
1858 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1859 &laddr, &faddr, &lport, &fport);
1860 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1861 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1862 }
5ba3f43e 1863
0a7de745
A
1864 // Set the pid info
1865 sock_info->cfs_pid = sock->last_pid;
1866 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
5ba3f43e 1867
0a7de745
A
1868 if (sock->so_flags & SOF_DELEGATED) {
1869 sock_info->cfs_e_pid = sock->e_pid;
1870 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1871 } else {
1872 sock_info->cfs_e_pid = sock->last_pid;
1873 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1874 }
5ba3f43e 1875
0a7de745 1876 socket_unlock(sock, 1);
5ba3f43e 1877
0a7de745
A
1878 goto return_already_unlocked;
1879 default:
1880 error = ENOPROTOOPT;
1881 break;
fe8ab488
A
1882 }
1883done:
1884 cfil_rw_unlock_shared(&cfil_lck_rw);
1885
0a7de745 1886 return error;
5ba3f43e 1887
0a7de745 1888return_already_unlocked:
5ba3f43e 1889
0a7de745 1890 return error;
fe8ab488
A
1891}
1892
1893static errno_t
1894cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
0a7de745 1895 int opt, void *data, size_t len)
fe8ab488
A
1896{
1897#pragma unused(kctlref, opt)
0a7de745 1898 errno_t error = 0;
fe8ab488
A
1899 struct content_filter *cfc = (struct content_filter *)unitinfo;
1900
1901 CFIL_LOG(LOG_NOTICE, "");
1902
1903 cfil_rw_lock_exclusive(&cfil_lck_rw);
1904
1905 if (content_filters == NULL) {
1906 CFIL_LOG(LOG_ERR, "no content filter");
1907 error = EINVAL;
1908 goto done;
1909 }
1910 if (kcunit > MAX_CONTENT_FILTER) {
1911 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
0a7de745 1912 kcunit, MAX_CONTENT_FILTER);
fe8ab488
A
1913 error = EINVAL;
1914 goto done;
1915 }
1916 if (cfc != (void *)content_filters[kcunit - 1]) {
1917 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
0a7de745 1918 kcunit);
fe8ab488
A
1919 error = EINVAL;
1920 goto done;
1921 }
1922 switch (opt) {
0a7de745
A
1923 case CFIL_OPT_NECP_CONTROL_UNIT:
1924 if (len < sizeof(uint32_t)) {
1925 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1926 "len too small %lu", len);
1927 error = EINVAL;
1928 goto done;
1929 }
1930 if (cfc->cf_necp_control_unit != 0) {
1931 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1932 "already set %u",
1933 cfc->cf_necp_control_unit);
1934 error = EINVAL;
1935 goto done;
1936 }
1937 cfc->cf_necp_control_unit = *(uint32_t *)data;
1938 break;
1939 default:
1940 error = ENOPROTOOPT;
1941 break;
fe8ab488
A
1942 }
1943done:
1944 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1945
0a7de745 1946 return error;
fe8ab488
A
1947}
1948
1949
1950static void
1951cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
1952{
1953#pragma unused(kctlref, flags)
1954 struct content_filter *cfc = (struct content_filter *)unitinfo;
1955 struct socket *so = NULL;
1956 int error;
1957 struct cfil_entry *entry;
d9a64523 1958 struct cfil_info *cfil_info = NULL;
fe8ab488
A
1959
1960 CFIL_LOG(LOG_INFO, "");
1961
1962 if (content_filters == NULL) {
1963 CFIL_LOG(LOG_ERR, "no content filter");
1964 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1965 return;
1966 }
1967 if (kcunit > MAX_CONTENT_FILTER) {
1968 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
0a7de745 1969 kcunit, MAX_CONTENT_FILTER);
fe8ab488
A
1970 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1971 return;
1972 }
1973 cfil_rw_lock_shared(&cfil_lck_rw);
1974 if (cfc != (void *)content_filters[kcunit - 1]) {
1975 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
0a7de745 1976 kcunit);
fe8ab488
A
1977 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1978 goto done;
1979 }
1980 /* Let's assume the flow control is lifted */
1981 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
0a7de745 1982 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
fe8ab488 1983 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 1984 }
fe8ab488 1985
0a7de745 1986 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
fe8ab488
A
1987
1988 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
5ba3f43e 1989 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
fe8ab488
A
1990 }
1991 /*
1992 * Flow control will be raised again as soon as an entry cannot enqueue
1993 * to the kernel control socket
1994 */
1995 while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
1996 verify_content_filter(cfc);
1997
1998 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
1999
2000 /* Find an entry that is flow controlled */
2001 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2002 if (entry->cfe_cfil_info == NULL ||
0a7de745 2003 entry->cfe_cfil_info->cfi_so == NULL) {
fe8ab488 2004 continue;
0a7de745
A
2005 }
2006 if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
fe8ab488 2007 continue;
0a7de745 2008 }
fe8ab488 2009 }
0a7de745 2010 if (entry == NULL) {
fe8ab488 2011 break;
0a7de745 2012 }
fe8ab488
A
2013
2014 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2015
d9a64523
A
2016 cfil_info = entry->cfe_cfil_info;
2017 so = cfil_info->cfi_so;
fe8ab488
A
2018
2019 cfil_rw_unlock_shared(&cfil_lck_rw);
2020 socket_lock(so, 1);
2021
2022 do {
d9a64523 2023 error = cfil_acquire_sockbuf(so, cfil_info, 1);
0a7de745 2024 if (error == 0) {
d9a64523 2025 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
0a7de745 2026 }
fe8ab488 2027 cfil_release_sockbuf(so, 1);
0a7de745 2028 if (error != 0) {
fe8ab488 2029 break;
0a7de745 2030 }
fe8ab488 2031
d9a64523 2032 error = cfil_acquire_sockbuf(so, cfil_info, 0);
0a7de745 2033 if (error == 0) {
d9a64523 2034 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
0a7de745 2035 }
fe8ab488
A
2036 cfil_release_sockbuf(so, 0);
2037 } while (0);
2038
2039 socket_lock_assert_owned(so);
2040 socket_unlock(so, 1);
2041
2042 cfil_rw_lock_shared(&cfil_lck_rw);
2043 }
2044done:
2045 cfil_rw_unlock_shared(&cfil_lck_rw);
2046}
2047
2048void
2049cfil_init(void)
2050{
2051 struct kern_ctl_reg kern_ctl;
0a7de745
A
2052 errno_t error = 0;
2053 vm_size_t content_filter_size = 0; /* size of content_filter */
2054 vm_size_t cfil_info_size = 0; /* size of cfil_info */
2055 vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2056 vm_size_t cfil_db_size = 0; /* size of cfil_db */
2057 unsigned int mbuf_limit = 0;
fe8ab488
A
2058
2059 CFIL_LOG(LOG_NOTICE, "");
2060
2061 /*
2062 * Compile time verifications
2063 */
2064 _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2065 _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2066 _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2067 _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2068
2069 /*
2070 * Runtime time verifications
2071 */
2072 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
0a7de745 2073 sizeof(uint32_t)));
fe8ab488 2074 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
0a7de745 2075 sizeof(uint32_t)));
fe8ab488 2076 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
0a7de745 2077 sizeof(uint32_t)));
fe8ab488 2078 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
0a7de745 2079 sizeof(uint32_t)));
fe8ab488
A
2080
2081 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
0a7de745 2082 sizeof(uint32_t)));
fe8ab488 2083 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
0a7de745 2084 sizeof(uint32_t)));
fe8ab488
A
2085
2086 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
0a7de745 2087 sizeof(uint32_t)));
fe8ab488 2088 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
0a7de745 2089 sizeof(uint32_t)));
fe8ab488 2090 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
0a7de745 2091 sizeof(uint32_t)));
fe8ab488 2092 VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
0a7de745 2093 sizeof(uint32_t)));
fe8ab488
A
2094
2095 /*
2096 * Zone for content filters kernel control sockets
2097 */
2098 content_filter_size = sizeof(struct content_filter);
2099 content_filter_zone = zinit(content_filter_size,
0a7de745
A
2100 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2101 0,
2102 CONTENT_FILTER_ZONE_NAME);
fe8ab488
A
2103 if (content_filter_zone == NULL) {
2104 panic("%s: zinit(%s) failed", __func__,
0a7de745 2105 CONTENT_FILTER_ZONE_NAME);
fe8ab488
A
2106 /* NOTREACHED */
2107 }
2108 zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2109 zone_change(content_filter_zone, Z_EXPAND, TRUE);
2110
2111 /*
2112 * Zone for per socket content filters
2113 */
2114 cfil_info_size = sizeof(struct cfil_info);
2115 cfil_info_zone = zinit(cfil_info_size,
0a7de745
A
2116 CFIL_INFO_ZONE_MAX * cfil_info_size,
2117 0,
2118 CFIL_INFO_ZONE_NAME);
fe8ab488
A
2119 if (cfil_info_zone == NULL) {
2120 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2121 /* NOTREACHED */
2122 }
2123 zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2124 zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2125
0a7de745
A
2126 /*
2127 * Zone for content filters cfil hash entries and db
2128 */
2129 cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2130 cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2131 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2132 0,
2133 CFIL_HASH_ENTRY_ZONE_NAME);
2134 if (cfil_hash_entry_zone == NULL) {
2135 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2136 /* NOTREACHED */
2137 }
2138 zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2139 zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2140
2141 cfil_db_size = sizeof(struct cfil_db);
2142 cfil_db_zone = zinit(cfil_db_size,
2143 CFIL_DB_ZONE_MAX * cfil_db_size,
2144 0,
2145 CFIL_DB_ZONE_NAME);
2146 if (cfil_db_zone == NULL) {
2147 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2148 /* NOTREACHED */
2149 }
2150 zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2151 zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2152
fe8ab488
A
2153 /*
2154 * Allocate locks
2155 */
2156 cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2157 if (cfil_lck_grp_attr == NULL) {
2158 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2159 /* NOTREACHED */
2160 }
2161 cfil_lck_grp = lck_grp_alloc_init("content filter",
0a7de745 2162 cfil_lck_grp_attr);
fe8ab488
A
2163 if (cfil_lck_grp == NULL) {
2164 panic("%s: lck_grp_alloc_init failed", __func__);
2165 /* NOTREACHED */
2166 }
2167 cfil_lck_attr = lck_attr_alloc_init();
2168 if (cfil_lck_attr == NULL) {
2169 panic("%s: lck_attr_alloc_init failed", __func__);
2170 /* NOTREACHED */
2171 }
2172 lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2173
2174 TAILQ_INIT(&cfil_sock_head);
2175
2176 /*
2177 * Register kernel control
2178 */
2179 bzero(&kern_ctl, sizeof(kern_ctl));
2180 strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
0a7de745 2181 sizeof(kern_ctl.ctl_name));
fe8ab488
A
2182 kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2183 kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2184 kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2185 kern_ctl.ctl_connect = cfil_ctl_connect;
2186 kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2187 kern_ctl.ctl_send = cfil_ctl_send;
2188 kern_ctl.ctl_getopt = cfil_ctl_getopt;
2189 kern_ctl.ctl_setopt = cfil_ctl_setopt;
2190 kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2191 error = ctl_register(&kern_ctl, &cfil_kctlref);
2192 if (error != 0) {
2193 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2194 return;
2195 }
d9a64523
A
2196
2197 // Spawn thread for gargage collection
2198 if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
0a7de745 2199 &cfil_udp_gc_thread) != KERN_SUCCESS) {
d9a64523
A
2200 panic_plain("%s: Can't create UDP GC thread", __func__);
2201 /* NOTREACHED */
2202 }
2203 /* this must not fail */
2204 VERIFY(cfil_udp_gc_thread != NULL);
2205
2206 // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2207 mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2208 cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2209 cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
fe8ab488
A
2210}
2211
2212struct cfil_info *
d9a64523 2213cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
fe8ab488
A
2214{
2215 int kcunit;
2216 struct cfil_info *cfil_info = NULL;
2217 struct inpcb *inp = sotoinpcb(so);
2218
2219 CFIL_LOG(LOG_INFO, "");
2220
2221 socket_lock_assert_owned(so);
2222
2223 cfil_info = zalloc(cfil_info_zone);
0a7de745 2224 if (cfil_info == NULL) {
fe8ab488 2225 goto done;
0a7de745 2226 }
fe8ab488
A
2227 bzero(cfil_info, sizeof(struct cfil_info));
2228
2229 cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2230 cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2231
2232 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2233 struct cfil_entry *entry;
2234
2235 entry = &cfil_info->cfi_entries[kcunit - 1];
2236 entry->cfe_cfil_info = cfil_info;
2237
2238 /* Initialize the filter entry */
2239 entry->cfe_filter = NULL;
2240 entry->cfe_flags = 0;
2241 entry->cfe_necp_control_unit = 0;
2242 entry->cfe_snd.cfe_pass_offset = 0;
2243 entry->cfe_snd.cfe_peek_offset = 0;
2244 entry->cfe_snd.cfe_peeked = 0;
2245 entry->cfe_rcv.cfe_pass_offset = 0;
2246 entry->cfe_rcv.cfe_peek_offset = 0;
2247 entry->cfe_rcv.cfe_peeked = 0;
d9a64523
A
2248 /*
2249 * Timestamp the last action to avoid pre-maturely
2250 * triggering garbage collection
2251 */
2252 microuptime(&entry->cfe_last_action);
fe8ab488
A
2253
2254 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2255 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2256 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2257 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2258 }
2259
2260 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 2261
fe8ab488
A
2262 /*
2263 * Create a cfi_sock_id that's not the socket pointer!
2264 */
0a7de745
A
2265
2266 if (hash_entry == NULL) {
d9a64523 2267 // This is the TCP case, cfil_info is tracked per socket
0a7de745
A
2268 if (inp->inp_flowhash == 0) {
2269 inp->inp_flowhash = inp_calc_flowhash(inp);
2270 }
2271
2272 so->so_cfil = cfil_info;
2273 cfil_info->cfi_so = so;
2274 cfil_info->cfi_sock_id =
2275 ((so->so_gencnt << 32) | inp->inp_flowhash);
2276 } else {
2277 // This is the UDP case, cfil_info is tracked in per-socket hash
d9a64523 2278 cfil_info->cfi_so = so;
0a7de745 2279 hash_entry->cfentry_cfil = cfil_info;
d9a64523 2280 cfil_info->cfi_hash_entry = hash_entry;
0a7de745 2281 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
d9a64523 2282 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
0a7de745 2283 inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
d9a64523
A
2284
2285 // Wake up gc thread if this is first flow added
2286 if (cfil_sock_udp_attached_count == 0) {
2287 thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2288 }
2289
2290 cfil_sock_udp_attached_count++;
0a7de745 2291 }
fe8ab488
A
2292
2293 TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2294
2295 cfil_sock_attached_count++;
2296
2297 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2298
2299done:
0a7de745 2300 if (cfil_info != NULL) {
fe8ab488 2301 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
0a7de745 2302 } else {
fe8ab488 2303 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
0a7de745 2304 }
fe8ab488 2305
0a7de745 2306 return cfil_info;
fe8ab488
A
2307}
2308
2309int
d9a64523 2310cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
fe8ab488
A
2311{
2312 int kcunit;
fe8ab488
A
2313 int attached = 0;
2314
2315 CFIL_LOG(LOG_INFO, "");
2316
2317 socket_lock_assert_owned(so);
2318
2319 cfil_rw_lock_exclusive(&cfil_lck_rw);
2320
2321 for (kcunit = 1;
0a7de745
A
2322 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2323 kcunit++) {
fe8ab488
A
2324 struct content_filter *cfc = content_filters[kcunit - 1];
2325 struct cfil_entry *entry;
2326
0a7de745 2327 if (cfc == NULL) {
fe8ab488 2328 continue;
0a7de745
A
2329 }
2330 if (cfc->cf_necp_control_unit != filter_control_unit) {
fe8ab488 2331 continue;
0a7de745 2332 }
fe8ab488
A
2333
2334 entry = &cfil_info->cfi_entries[kcunit - 1];
2335
2336 entry->cfe_filter = cfc;
2337 entry->cfe_necp_control_unit = filter_control_unit;
2338 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2339 cfc->cf_sock_count++;
2340 verify_content_filter(cfc);
2341 attached = 1;
2342 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2343 break;
2344 }
2345
2346 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2347
0a7de745 2348 return attached;
fe8ab488
A
2349}
2350
2351static void
d9a64523 2352cfil_info_free(struct cfil_info *cfil_info)
fe8ab488
A
2353{
2354 int kcunit;
2355 uint64_t in_drain = 0;
2356 uint64_t out_drained = 0;
2357
0a7de745 2358 if (cfil_info == NULL) {
fe8ab488 2359 return;
0a7de745 2360 }
fe8ab488
A
2361
2362 CFIL_LOG(LOG_INFO, "");
2363
2364 cfil_rw_lock_exclusive(&cfil_lck_rw);
2365
2366 for (kcunit = 1;
0a7de745
A
2367 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2368 kcunit++) {
fe8ab488
A
2369 struct cfil_entry *entry;
2370 struct content_filter *cfc;
2371
2372 entry = &cfil_info->cfi_entries[kcunit - 1];
2373
2374 /* Don't be silly and try to detach twice */
0a7de745 2375 if (entry->cfe_filter == NULL) {
fe8ab488 2376 continue;
0a7de745 2377 }
fe8ab488
A
2378
2379 cfc = content_filters[kcunit - 1];
2380
2381 VERIFY(cfc == entry->cfe_filter);
2382
2383 entry->cfe_filter = NULL;
2384 entry->cfe_necp_control_unit = 0;
2385 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2386 cfc->cf_sock_count--;
2387
2388 verify_content_filter(cfc);
2389 }
0a7de745 2390 if (cfil_info->cfi_hash_entry != NULL) {
d9a64523 2391 cfil_sock_udp_attached_count--;
0a7de745 2392 }
fe8ab488
A
2393 cfil_sock_attached_count--;
2394 TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2395
2396 out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2397 in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2398
2399 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2400 struct cfil_entry *entry;
2401
2402 entry = &cfil_info->cfi_entries[kcunit - 1];
2403 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2404 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2405 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2406 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2407 }
2408 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2409
0a7de745 2410 if (out_drained) {
fe8ab488 2411 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
0a7de745
A
2412 }
2413 if (in_drain) {
fe8ab488 2414 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
0a7de745 2415 }
fe8ab488
A
2416
2417 zfree(cfil_info_zone, cfil_info);
2418}
2419
2420/*
2421 * Entry point from Sockets layer
2422 * The socket is locked.
2423 */
2424errno_t
2425cfil_sock_attach(struct socket *so)
2426{
2427 errno_t error = 0;
2428 uint32_t filter_control_unit;
2429
2430 socket_lock_assert_owned(so);
2431
813fb2f6 2432 /* Limit ourselves to TCP that are not MPTCP subflows */
fe8ab488 2433 if ((so->so_proto->pr_domain->dom_family != PF_INET &&
0a7de745
A
2434 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2435 so->so_proto->pr_type != SOCK_STREAM ||
2436 so->so_proto->pr_protocol != IPPROTO_TCP ||
2437 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2438 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
fe8ab488 2439 goto done;
0a7de745 2440 }
fe8ab488
A
2441
2442 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
0a7de745 2443 if (filter_control_unit == 0) {
fe8ab488 2444 goto done;
0a7de745 2445 }
fe8ab488
A
2446
2447 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2448 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2449 goto done;
2450 }
2451 if (cfil_active_count == 0) {
2452 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2453 goto done;
2454 }
2455 if (so->so_cfil != NULL) {
2456 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2457 CFIL_LOG(LOG_ERR, "already attached");
2458 } else {
d9a64523 2459 cfil_info_alloc(so, NULL);
fe8ab488
A
2460 if (so->so_cfil == NULL) {
2461 error = ENOMEM;
2462 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2463 goto done;
2464 }
2465 }
d9a64523 2466 if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
fe8ab488 2467 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
0a7de745 2468 filter_control_unit);
fe8ab488
A
2469 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2470 goto done;
2471 }
d9a64523 2472 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
0a7de745
A
2473 (uint64_t)VM_KERNEL_ADDRPERM(so),
2474 filter_control_unit, so->so_cfil->cfi_sock_id);
fe8ab488
A
2475
2476 so->so_flags |= SOF_CONTENT_FILTER;
2477 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2478
2479 /* Hold a reference on the socket */
2480 so->so_usecount++;
2481
d9a64523 2482 error = cfil_dispatch_attach_event(so, so->so_cfil, filter_control_unit);
fe8ab488 2483 /* We can recover from flow control or out of memory errors */
0a7de745 2484 if (error == ENOBUFS || error == ENOMEM) {
fe8ab488 2485 error = 0;
0a7de745 2486 } else if (error != 0) {
fe8ab488 2487 goto done;
0a7de745 2488 }
fe8ab488
A
2489
2490 CFIL_INFO_VERIFY(so->so_cfil);
2491done:
0a7de745 2492 return error;
fe8ab488
A
2493}
2494
2495/*
2496 * Entry point from Sockets layer
2497 * The socket is locked.
2498 */
2499errno_t
2500cfil_sock_detach(struct socket *so)
2501{
d9a64523
A
2502 if (IS_UDP(so)) {
2503 cfil_db_free(so);
0a7de745 2504 return 0;
d9a64523
A
2505 }
2506
fe8ab488 2507 if (so->so_cfil) {
d9a64523
A
2508 if (so->so_flags & SOF_CONTENT_FILTER) {
2509 so->so_flags &= ~SOF_CONTENT_FILTER;
2510 VERIFY(so->so_usecount > 0);
2511 so->so_usecount--;
2512 }
2513 cfil_info_free(so->so_cfil);
2514 so->so_cfil = NULL;
fe8ab488
A
2515 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2516 }
0a7de745 2517 return 0;
fe8ab488
A
2518}
2519
2520static int
d9a64523 2521cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info, uint32_t filter_control_unit)
fe8ab488
A
2522{
2523 errno_t error = 0;
2524 struct cfil_entry *entry = NULL;
2525 struct cfil_msg_sock_attached msg_attached;
2526 uint32_t kcunit;
5ba3f43e 2527 struct content_filter *cfc = NULL;
fe8ab488
A
2528
2529 socket_lock_assert_owned(so);
2530
2531 cfil_rw_lock_shared(&cfil_lck_rw);
2532
2533 if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2534 error = EINVAL;
2535 goto done;
2536 }
2537 /*
2538 * Find the matching filter unit
2539 */
2540 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2541 cfc = content_filters[kcunit - 1];
2542
0a7de745 2543 if (cfc == NULL) {
fe8ab488 2544 continue;
0a7de745
A
2545 }
2546 if (cfc->cf_necp_control_unit != filter_control_unit) {
fe8ab488 2547 continue;
0a7de745 2548 }
d9a64523 2549 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 2550 if (entry->cfe_filter == NULL) {
fe8ab488 2551 continue;
0a7de745 2552 }
fe8ab488
A
2553
2554 VERIFY(cfc == entry->cfe_filter);
2555
2556 break;
2557 }
2558
0a7de745 2559 if (entry == NULL || entry->cfe_filter == NULL) {
fe8ab488 2560 goto done;
0a7de745 2561 }
fe8ab488 2562
0a7de745 2563 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
fe8ab488 2564 goto done;
0a7de745 2565 }
fe8ab488
A
2566
2567 CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
0a7de745 2568 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
fe8ab488
A
2569
2570 /* Would be wasteful to try when flow controlled */
2571 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2572 error = ENOBUFS;
2573 goto done;
2574 }
2575
2576 bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2577 msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2578 msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2579 msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2580 msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2581 msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2582
2583 msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2584 msg_attached.cfs_sock_type = so->so_proto->pr_type;
2585 msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2586 msg_attached.cfs_pid = so->last_pid;
2587 memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2588 if (so->so_flags & SOF_DELEGATED) {
2589 msg_attached.cfs_e_pid = so->e_pid;
2590 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2591 } else {
2592 msg_attached.cfs_e_pid = so->last_pid;
2593 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2594 }
d9a64523
A
2595
2596#if LIFECYCLE_DEBUG
2597 CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
0a7de745 2598 entry->cfe_cfil_info->cfi_sock_id);
d9a64523
A
2599#endif
2600
fe8ab488 2601 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
0a7de745
A
2602 entry->cfe_filter->cf_kcunit,
2603 &msg_attached,
2604 sizeof(struct cfil_msg_sock_attached),
2605 CTL_DATA_EOR);
fe8ab488
A
2606 if (error != 0) {
2607 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2608 goto done;
2609 }
2610 microuptime(&entry->cfe_last_event);
d9a64523
A
2611 cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
2612 cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
5ba3f43e 2613
fe8ab488
A
2614 entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
2615 OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2616done:
2617
2618 /* We can recover from flow control */
2619 if (error == ENOBUFS) {
2620 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2621 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2622
0a7de745 2623 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
fe8ab488 2624 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 2625 }
fe8ab488
A
2626
2627 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2628
2629 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2630 } else {
0a7de745 2631 if (error != 0) {
fe8ab488 2632 OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
0a7de745 2633 }
fe8ab488
A
2634
2635 cfil_rw_unlock_shared(&cfil_lck_rw);
2636 }
0a7de745 2637 return error;
fe8ab488
A
2638}
2639
2640static int
d9a64523 2641cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
fe8ab488
A
2642{
2643 errno_t error = 0;
2644 struct mbuf *msg = NULL;
2645 struct cfil_entry *entry;
2646 struct cfe_buf *entrybuf;
2647 struct cfil_msg_hdr msg_disconnected;
2648 struct content_filter *cfc;
2649
2650 socket_lock_assert_owned(so);
2651
2652 cfil_rw_lock_shared(&cfil_lck_rw);
2653
d9a64523 2654 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 2655 if (outgoing) {
fe8ab488 2656 entrybuf = &entry->cfe_snd;
0a7de745 2657 } else {
fe8ab488 2658 entrybuf = &entry->cfe_rcv;
0a7de745 2659 }
fe8ab488
A
2660
2661 cfc = entry->cfe_filter;
0a7de745 2662 if (cfc == NULL) {
fe8ab488 2663 goto done;
0a7de745 2664 }
fe8ab488
A
2665
2666 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
0a7de745 2667 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
fe8ab488
A
2668
2669 /*
2670 * Send the disconnection event once
2671 */
2672 if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
0a7de745 2673 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
fe8ab488 2674 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
0a7de745 2675 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
2676 goto done;
2677 }
2678
2679 /*
2680 * We're not disconnected as long as some data is waiting
2681 * to be delivered to the filter
2682 */
2683 if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
2684 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
0a7de745 2685 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
2686 error = EBUSY;
2687 goto done;
2688 }
2689 /* Would be wasteful to try when flow controlled */
2690 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2691 error = ENOBUFS;
2692 goto done;
2693 }
2694
d9a64523
A
2695#if LIFECYCLE_DEBUG
2696 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
0a7de745
A
2697 "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2698 "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
d9a64523
A
2699#endif
2700
fe8ab488
A
2701 bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2702 msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2703 msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2704 msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2705 msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
0a7de745 2706 CFM_OP_DISCONNECT_IN;
fe8ab488
A
2707 msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2708 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
0a7de745
A
2709 entry->cfe_filter->cf_kcunit,
2710 &msg_disconnected,
2711 sizeof(struct cfil_msg_hdr),
2712 CTL_DATA_EOR);
fe8ab488
A
2713 if (error != 0) {
2714 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2715 mbuf_freem(msg);
2716 goto done;
2717 }
2718 microuptime(&entry->cfe_last_event);
d9a64523 2719 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
fe8ab488
A
2720
2721 /* Remember we have sent the disconnection message */
2722 if (outgoing) {
2723 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
2724 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2725 } else {
2726 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
2727 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2728 }
2729done:
2730 if (error == ENOBUFS) {
2731 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2732 OSIncrementAtomic(
2733 &cfil_stats.cfs_disconnect_event_flow_control);
2734
0a7de745 2735 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
fe8ab488 2736 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 2737 }
fe8ab488
A
2738
2739 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2740
2741 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2742 } else {
0a7de745 2743 if (error != 0) {
fe8ab488
A
2744 OSIncrementAtomic(
2745 &cfil_stats.cfs_disconnect_event_fail);
0a7de745 2746 }
fe8ab488
A
2747
2748 cfil_rw_unlock_shared(&cfil_lck_rw);
2749 }
0a7de745 2750 return error;
fe8ab488
A
2751}
2752
2753int
d9a64523 2754cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
fe8ab488
A
2755{
2756 struct cfil_entry *entry;
5ba3f43e 2757 struct cfil_msg_sock_closed msg_closed;
fe8ab488
A
2758 errno_t error = 0;
2759 struct content_filter *cfc;
2760
2761 socket_lock_assert_owned(so);
2762
2763 cfil_rw_lock_shared(&cfil_lck_rw);
2764
d9a64523 2765 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488 2766 cfc = entry->cfe_filter;
0a7de745 2767 if (cfc == NULL) {
fe8ab488 2768 goto done;
0a7de745 2769 }
fe8ab488
A
2770
2771 CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
0a7de745 2772 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
fe8ab488
A
2773
2774 /* Would be wasteful to try when flow controlled */
2775 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2776 error = ENOBUFS;
2777 goto done;
2778 }
2779 /*
2780 * Send a single closed message per filter
2781 */
0a7de745 2782 if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
fe8ab488 2783 goto done;
0a7de745
A
2784 }
2785 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
fe8ab488 2786 goto done;
0a7de745 2787 }
fe8ab488 2788
5ba3f43e 2789 microuptime(&entry->cfe_last_event);
d9a64523 2790 CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
5ba3f43e
A
2791
2792 bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
2793 msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
2794 msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
2795 msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
2796 msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
2797 msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
d9a64523
A
2798 msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
2799 msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
0a7de745
A
2800 memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
2801 memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
d9a64523
A
2802 msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
2803
2804#if LIFECYCLE_DEBUG
2805 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
2806#endif
5ba3f43e 2807 /* for debugging
0a7de745
A
2808 * if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2809 * msg_closed.cfc_op_list_ctr = CFI_MAX_TIME_LOG_ENTRY; // just in case
2810 * }
2811 * for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2812 * CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2813 * }
2814 */
5ba3f43e 2815
fe8ab488 2816 error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
0a7de745
A
2817 entry->cfe_filter->cf_kcunit,
2818 &msg_closed,
2819 sizeof(struct cfil_msg_sock_closed),
2820 CTL_DATA_EOR);
fe8ab488
A
2821 if (error != 0) {
2822 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
0a7de745 2823 error);
fe8ab488
A
2824 goto done;
2825 }
5ba3f43e 2826
fe8ab488
A
2827 entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
2828 OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2829done:
2830 /* We can recover from flow control */
2831 if (error == ENOBUFS) {
2832 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2833 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2834
0a7de745 2835 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
fe8ab488 2836 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 2837 }
fe8ab488
A
2838
2839 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2840
2841 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2842 } else {
0a7de745 2843 if (error != 0) {
fe8ab488 2844 OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
0a7de745 2845 }
fe8ab488
A
2846
2847 cfil_rw_unlock_shared(&cfil_lck_rw);
2848 }
2849
0a7de745 2850 return error;
fe8ab488
A
2851}
2852
2853static void
2854fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
0a7de745 2855 struct in6_addr *ip6, u_int16_t port)
fe8ab488
A
2856{
2857 struct sockaddr_in6 *sin6 = &sin46->sin6;
2858
2859 sin6->sin6_family = AF_INET6;
2860 sin6->sin6_len = sizeof(*sin6);
2861 sin6->sin6_port = port;
2862 sin6->sin6_addr = *ip6;
2863 if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2864 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
2865 sin6->sin6_addr.s6_addr16[1] = 0;
2866 }
2867}
2868
2869static void
2870fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
0a7de745 2871 struct in_addr ip, u_int16_t port)
fe8ab488
A
2872{
2873 struct sockaddr_in *sin = &sin46->sin;
2874
2875 sin->sin_family = AF_INET;
2876 sin->sin_len = sizeof(*sin);
2877 sin->sin_port = port;
2878 sin->sin_addr.s_addr = ip.s_addr;
2879}
2880
d9a64523
A
2881static void
2882cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
0a7de745
A
2883 struct in6_addr **laddr, struct in6_addr **faddr,
2884 u_int16_t *lport, u_int16_t *fport)
d9a64523
A
2885{
2886 if (entry != NULL) {
2887 *laddr = &entry->cfentry_laddr.addr6;
2888 *faddr = &entry->cfentry_faddr.addr6;
2889 *lport = entry->cfentry_lport;
2890 *fport = entry->cfentry_fport;
2891 } else {
2892 *laddr = &inp->in6p_laddr;
2893 *faddr = &inp->in6p_faddr;
2894 *lport = inp->inp_lport;
2895 *fport = inp->inp_fport;
2896 }
2897}
2898
2899static void
2900cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
0a7de745
A
2901 struct in_addr *laddr, struct in_addr *faddr,
2902 u_int16_t *lport, u_int16_t *fport)
d9a64523
A
2903{
2904 if (entry != NULL) {
2905 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
2906 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
2907 *lport = entry->cfentry_lport;
2908 *fport = entry->cfentry_fport;
2909 } else {
2910 *laddr = inp->inp_laddr;
2911 *faddr = inp->inp_faddr;
2912 *lport = inp->inp_lport;
2913 *fport = inp->inp_fport;
2914 }
2915}
2916
fe8ab488 2917static int
d9a64523 2918cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
0a7de745 2919 struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
fe8ab488
A
2920{
2921 errno_t error = 0;
2922 struct mbuf *copy = NULL;
2923 struct mbuf *msg = NULL;
2924 unsigned int one = 1;
2925 struct cfil_msg_data_event *data_req;
2926 size_t hdrsize;
2927 struct inpcb *inp = (struct inpcb *)so->so_pcb;
2928 struct cfil_entry *entry;
2929 struct cfe_buf *entrybuf;
2930 struct content_filter *cfc;
5ba3f43e 2931 struct timeval tv;
fe8ab488
A
2932
2933 cfil_rw_lock_shared(&cfil_lck_rw);
2934
d9a64523 2935 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 2936 if (outgoing) {
fe8ab488 2937 entrybuf = &entry->cfe_snd;
0a7de745 2938 } else {
fe8ab488 2939 entrybuf = &entry->cfe_rcv;
0a7de745 2940 }
fe8ab488
A
2941
2942 cfc = entry->cfe_filter;
0a7de745 2943 if (cfc == NULL) {
fe8ab488 2944 goto done;
0a7de745 2945 }
fe8ab488 2946
d9a64523
A
2947 data = cfil_data_start(data);
2948 if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
2949 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
2950 goto done;
2951 }
2952
fe8ab488 2953 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
0a7de745 2954 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
fe8ab488
A
2955
2956 socket_lock_assert_owned(so);
2957
2958 /* Would be wasteful to try */
2959 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2960 error = ENOBUFS;
2961 goto done;
2962 }
2963
2964 /* Make a copy of the data to pass to kernel control socket */
2965 copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
0a7de745 2966 M_COPYM_NOOP_HDR);
fe8ab488
A
2967 if (copy == NULL) {
2968 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2969 error = ENOMEM;
2970 goto done;
2971 }
2972
2973 /* We need an mbuf packet for the message header */
2974 hdrsize = sizeof(struct cfil_msg_data_event);
2975 error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2976 if (error != 0) {
2977 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2978 m_freem(copy);
2979 /*
2980 * ENOBUFS is to indicate flow control
2981 */
2982 error = ENOMEM;
2983 goto done;
2984 }
2985 mbuf_setlen(msg, hdrsize);
2986 mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2987 msg->m_next = copy;
2988 data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2989 bzero(data_req, hdrsize);
2990 data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2991 data_req->cfd_msghdr.cfm_version = 1;
2992 data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2993 data_req->cfd_msghdr.cfm_op =
0a7de745 2994 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
fe8ab488 2995 data_req->cfd_msghdr.cfm_sock_id =
0a7de745 2996 entry->cfe_cfil_info->cfi_sock_id;
fe8ab488
A
2997 data_req->cfd_start_offset = entrybuf->cfe_peeked;
2998 data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2999
3000 /*
3001 * TBD:
3002 * For non connected sockets need to copy addresses from passed
3003 * parameters
3004 */
3005 if (inp->inp_vflag & INP_IPV6) {
d9a64523
A
3006 struct in6_addr *laddr = NULL, *faddr = NULL;
3007 u_int16_t lport = 0, fport = 0;
3008
3009 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
0a7de745 3010 &laddr, &faddr, &lport, &fport);
fe8ab488 3011 if (outgoing) {
d9a64523
A
3012 fill_ip6_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
3013 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
fe8ab488 3014 } else {
d9a64523
A
3015 fill_ip6_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
3016 fill_ip6_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
fe8ab488
A
3017 }
3018 } else if (inp->inp_vflag & INP_IPV4) {
d9a64523
A
3019 struct in_addr laddr = {0}, faddr = {0};
3020 u_int16_t lport = 0, fport = 0;
3021
3022 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
0a7de745 3023 &laddr, &faddr, &lport, &fport);
d9a64523 3024
fe8ab488 3025 if (outgoing) {
d9a64523
A
3026 fill_ip_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
3027 fill_ip_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
fe8ab488 3028 } else {
d9a64523
A
3029 fill_ip_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
3030 fill_ip_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
fe8ab488
A
3031 }
3032 }
3033
5ba3f43e 3034 microuptime(&tv);
d9a64523 3035 CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
5ba3f43e 3036
fe8ab488
A
3037 /* Pass the message to the content filter */
3038 error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
0a7de745
A
3039 entry->cfe_filter->cf_kcunit,
3040 msg, CTL_DATA_EOR);
fe8ab488
A
3041 if (error != 0) {
3042 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3043 mbuf_freem(msg);
3044 goto done;
3045 }
3046 entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3047 OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
d9a64523
A
3048
3049#if VERDICT_DEBUG
3050 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
0a7de745 3051 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
d9a64523
A
3052#endif
3053
fe8ab488
A
3054done:
3055 if (error == ENOBUFS) {
3056 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3057 OSIncrementAtomic(
3058 &cfil_stats.cfs_data_event_flow_control);
3059
0a7de745 3060 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
fe8ab488 3061 cfil_rw_lock_exclusive(&cfil_lck_rw);
0a7de745 3062 }
fe8ab488
A
3063
3064 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3065
3066 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3067 } else {
0a7de745 3068 if (error != 0) {
fe8ab488 3069 OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
0a7de745 3070 }
fe8ab488
A
3071
3072 cfil_rw_unlock_shared(&cfil_lck_rw);
3073 }
0a7de745 3074 return error;
fe8ab488
A
3075}
3076
3077/*
3078 * Process the queue of data waiting to be delivered to content filter
3079 */
3080static int
d9a64523 3081cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
fe8ab488
A
3082{
3083 errno_t error = 0;
3084 struct mbuf *data, *tmp = NULL;
3085 unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3086 struct cfil_entry *entry;
3087 struct cfe_buf *entrybuf;
3088 uint64_t currentoffset = 0;
3089
0a7de745
A
3090 if (cfil_info == NULL) {
3091 return 0;
3092 }
fe8ab488
A
3093
3094 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
0a7de745 3095 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
fe8ab488
A
3096
3097 socket_lock_assert_owned(so);
3098
d9a64523 3099 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 3100 if (outgoing) {
fe8ab488 3101 entrybuf = &entry->cfe_snd;
0a7de745 3102 } else {
fe8ab488 3103 entrybuf = &entry->cfe_rcv;
0a7de745 3104 }
fe8ab488
A
3105
3106 /* Send attached message if not yet done */
3107 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
d9a64523 3108 error = cfil_dispatch_attach_event(so, cfil_info, kcunit);
fe8ab488
A
3109 if (error != 0) {
3110 /* We can recover from flow control */
0a7de745 3111 if (error == ENOBUFS || error == ENOMEM) {
fe8ab488 3112 error = 0;
0a7de745 3113 }
fe8ab488
A
3114 goto done;
3115 }
3116 } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3117 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3118 goto done;
3119 }
d9a64523
A
3120
3121#if DATA_DEBUG
3122 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
0a7de745
A
3123 entrybuf->cfe_pass_offset,
3124 entrybuf->cfe_peeked,
3125 entrybuf->cfe_peek_offset);
d9a64523 3126#endif
fe8ab488
A
3127
3128 /* Move all data that can pass */
3129 while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
0a7de745 3130 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
d9a64523 3131 datalen = cfil_data_length(data, NULL, NULL);
fe8ab488
A
3132 tmp = data;
3133
3134 if (entrybuf->cfe_ctl_q.q_start + datalen <=
0a7de745 3135 entrybuf->cfe_pass_offset) {
fe8ab488
A
3136 /*
3137 * The first mbuf can fully pass
3138 */
3139 copylen = datalen;
3140 } else {
3141 /*
3142 * The first mbuf can partially pass
3143 */
3144 copylen = entrybuf->cfe_pass_offset -
0a7de745 3145 entrybuf->cfe_ctl_q.q_start;
fe8ab488
A
3146 }
3147 VERIFY(copylen <= datalen);
3148
d9a64523 3149#if DATA_DEBUG
fe8ab488 3150 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3151 "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3152 "datalen %u copylen %u",
3153 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3154 entrybuf->cfe_ctl_q.q_start,
3155 entrybuf->cfe_peeked,
3156 entrybuf->cfe_pass_offset,
3157 entrybuf->cfe_peek_offset,
3158 datalen, copylen);
d9a64523 3159#endif
fe8ab488
A
3160
3161 /*
3162 * Data that passes has been peeked at explicitly or
3163 * implicitly
3164 */
3165 if (entrybuf->cfe_ctl_q.q_start + copylen >
0a7de745 3166 entrybuf->cfe_peeked) {
fe8ab488 3167 entrybuf->cfe_peeked =
0a7de745
A
3168 entrybuf->cfe_ctl_q.q_start + copylen;
3169 }
fe8ab488
A
3170 /*
3171 * Stop on partial pass
3172 */
0a7de745 3173 if (copylen < datalen) {
fe8ab488 3174 break;
0a7de745 3175 }
fe8ab488
A
3176
3177 /* All good, move full data from ctl queue to pending queue */
3178 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3179
3180 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
0a7de745 3181 if (outgoing) {
fe8ab488 3182 OSAddAtomic64(datalen,
0a7de745
A
3183 &cfil_stats.cfs_pending_q_out_enqueued);
3184 } else {
fe8ab488 3185 OSAddAtomic64(datalen,
0a7de745
A
3186 &cfil_stats.cfs_pending_q_in_enqueued);
3187 }
fe8ab488 3188 }
d9a64523 3189 CFIL_INFO_VERIFY(cfil_info);
0a7de745 3190 if (tmp != NULL) {
fe8ab488 3191 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3192 "%llx first %llu peeked %llu pass %llu peek %llu"
3193 "datalen %u copylen %u",
3194 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3195 entrybuf->cfe_ctl_q.q_start,
3196 entrybuf->cfe_peeked,
3197 entrybuf->cfe_pass_offset,
3198 entrybuf->cfe_peek_offset,
3199 datalen, copylen);
3200 }
fe8ab488
A
3201 tmp = NULL;
3202
3203 /* Now deal with remaining data the filter wants to peek at */
3204 for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
0a7de745
A
3205 currentoffset = entrybuf->cfe_ctl_q.q_start;
3206 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3207 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3208 currentoffset += datalen) {
d9a64523 3209 datalen = cfil_data_length(data, NULL, NULL);
fe8ab488
A
3210 tmp = data;
3211
3212 /* We've already peeked at this mbuf */
0a7de745 3213 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
fe8ab488 3214 continue;
0a7de745 3215 }
fe8ab488
A
3216 /*
3217 * The data in the first mbuf may have been
3218 * partially peeked at
3219 */
3220 copyoffset = entrybuf->cfe_peeked - currentoffset;
3221 VERIFY(copyoffset < datalen);
3222 copylen = datalen - copyoffset;
3223 VERIFY(copylen <= datalen);
3224 /*
3225 * Do not copy more than needed
3226 */
3227 if (currentoffset + copyoffset + copylen >
0a7de745 3228 entrybuf->cfe_peek_offset) {
fe8ab488 3229 copylen = entrybuf->cfe_peek_offset -
0a7de745 3230 (currentoffset + copyoffset);
fe8ab488
A
3231 }
3232
d9a64523 3233#if DATA_DEBUG
fe8ab488 3234 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3235 "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3236 "datalen %u copylen %u copyoffset %u",
3237 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3238 currentoffset,
3239 entrybuf->cfe_peeked,
3240 entrybuf->cfe_pass_offset,
3241 entrybuf->cfe_peek_offset,
3242 datalen, copylen, copyoffset);
d9a64523 3243#endif
fe8ab488
A
3244
3245 /*
3246 * Stop if there is nothing more to peek at
3247 */
0a7de745 3248 if (copylen == 0) {
fe8ab488 3249 break;
0a7de745 3250 }
fe8ab488
A
3251 /*
3252 * Let the filter get a peek at this span of data
3253 */
d9a64523 3254 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
0a7de745 3255 outgoing, data, copyoffset, copylen);
fe8ab488
A
3256 if (error != 0) {
3257 /* On error, leave data in ctl_q */
3258 break;
3259 }
3260 entrybuf->cfe_peeked += copylen;
0a7de745 3261 if (outgoing) {
fe8ab488 3262 OSAddAtomic64(copylen,
0a7de745
A
3263 &cfil_stats.cfs_ctl_q_out_peeked);
3264 } else {
fe8ab488 3265 OSAddAtomic64(copylen,
0a7de745
A
3266 &cfil_stats.cfs_ctl_q_in_peeked);
3267 }
fe8ab488
A
3268
3269 /* Stop when data could not be fully peeked at */
0a7de745 3270 if (copylen + copyoffset < datalen) {
fe8ab488 3271 break;
0a7de745 3272 }
fe8ab488 3273 }
d9a64523 3274 CFIL_INFO_VERIFY(cfil_info);
0a7de745 3275 if (tmp != NULL) {
fe8ab488 3276 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3277 "%llx first %llu peeked %llu pass %llu peek %llu"
3278 "datalen %u copylen %u copyoffset %u",
3279 (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3280 currentoffset,
3281 entrybuf->cfe_peeked,
3282 entrybuf->cfe_pass_offset,
3283 entrybuf->cfe_peek_offset,
3284 datalen, copylen, copyoffset);
3285 }
fe8ab488
A
3286
3287 /*
3288 * Process data that has passed the filter
3289 */
d9a64523 3290 error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
fe8ab488
A
3291 if (error != 0) {
3292 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
0a7de745 3293 error);
fe8ab488
A
3294 goto done;
3295 }
3296
3297 /*
3298 * Dispatch disconnect events that could not be sent
3299 */
0a7de745 3300 if (cfil_info == NULL) {
fe8ab488 3301 goto done;
0a7de745 3302 } else if (outgoing) {
d9a64523 3303 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
0a7de745 3304 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
d9a64523 3305 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
0a7de745 3306 }
fe8ab488 3307 } else {
d9a64523 3308 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
0a7de745 3309 !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
d9a64523 3310 cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
0a7de745 3311 }
fe8ab488
A
3312 }
3313
3314done:
3315 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3316 "first %llu peeked %llu pass %llu peek %llu",
3317 entrybuf->cfe_ctl_q.q_start,
3318 entrybuf->cfe_peeked,
3319 entrybuf->cfe_pass_offset,
3320 entrybuf->cfe_peek_offset);
fe8ab488 3321
d9a64523 3322 CFIL_INFO_VERIFY(cfil_info);
0a7de745 3323 return error;
fe8ab488
A
3324}
3325
3326/*
3327 * cfil_data_filter()
3328 *
3329 * Process data for a content filter installed on a socket
3330 */
3331int
d9a64523 3332cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
0a7de745 3333 struct mbuf *data, uint64_t datalen)
fe8ab488
A
3334{
3335 errno_t error = 0;
3336 struct cfil_entry *entry;
3337 struct cfe_buf *entrybuf;
3338
3339 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
0a7de745 3340 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
fe8ab488
A
3341
3342 socket_lock_assert_owned(so);
3343
d9a64523 3344 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 3345 if (outgoing) {
fe8ab488 3346 entrybuf = &entry->cfe_snd;
0a7de745 3347 } else {
fe8ab488 3348 entrybuf = &entry->cfe_rcv;
0a7de745 3349 }
fe8ab488
A
3350
3351 /* Are we attached to the filter? */
3352 if (entry->cfe_filter == NULL) {
3353 error = 0;
3354 goto done;
3355 }
3356
3357 /* Dispatch to filters */
3358 cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
0a7de745 3359 if (outgoing) {
fe8ab488 3360 OSAddAtomic64(datalen,
0a7de745
A
3361 &cfil_stats.cfs_ctl_q_out_enqueued);
3362 } else {
fe8ab488 3363 OSAddAtomic64(datalen,
0a7de745
A
3364 &cfil_stats.cfs_ctl_q_in_enqueued);
3365 }
fe8ab488 3366
d9a64523 3367 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
fe8ab488
A
3368 if (error != 0) {
3369 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
0a7de745 3370 error);
fe8ab488
A
3371 }
3372 /*
3373 * We have to return EJUSTRETURN in all cases to avoid double free
3374 * by socket layer
3375 */
3376 error = EJUSTRETURN;
3377done:
d9a64523 3378 CFIL_INFO_VERIFY(cfil_info);
fe8ab488
A
3379
3380 CFIL_LOG(LOG_INFO, "return %d", error);
0a7de745 3381 return error;
fe8ab488
A
3382}
3383
3384/*
3385 * cfil_service_inject_queue() re-inject data that passed the
3386 * content filters
3387 */
3388static int
d9a64523 3389cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
fe8ab488
A
3390{
3391 mbuf_t data;
3392 unsigned int datalen;
d9a64523
A
3393 int mbcnt = 0;
3394 int mbnum = 0;
fe8ab488 3395 errno_t error = 0;
fe8ab488
A
3396 struct cfi_buf *cfi_buf;
3397 struct cfil_queue *inject_q;
3398 int need_rwakeup = 0;
d9a64523 3399 int count = 0;
fe8ab488 3400
0a7de745
A
3401 if (cfil_info == NULL) {
3402 return 0;
3403 }
fe8ab488 3404
fe8ab488
A
3405 socket_lock_assert_owned(so);
3406
3407 if (outgoing) {
d9a64523
A
3408 cfi_buf = &cfil_info->cfi_snd;
3409 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
fe8ab488 3410 } else {
d9a64523
A
3411 cfi_buf = &cfil_info->cfi_rcv;
3412 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
fe8ab488
A
3413 }
3414 inject_q = &cfi_buf->cfi_inject_q;
3415
0a7de745
A
3416 if (cfil_queue_empty(inject_q)) {
3417 return 0;
3418 }
fe8ab488 3419
d9a64523
A
3420#if DATA_DEBUG | VERDICT_DEBUG
3421 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
0a7de745 3422 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
d9a64523 3423#endif
fe8ab488 3424
d9a64523
A
3425 while ((data = cfil_queue_first(inject_q)) != NULL) {
3426 datalen = cfil_data_length(data, &mbcnt, &mbnum);
fe8ab488 3427
d9a64523
A
3428#if DATA_DEBUG
3429 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
0a7de745
A
3430 remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3431 (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
d9a64523 3432#endif
fe8ab488 3433
d9a64523
A
3434 /* Remove data from queue and adjust stats */
3435 cfil_queue_remove(inject_q, data, datalen);
3436 cfi_buf->cfi_pending_first += datalen;
3437 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3438 cfi_buf->cfi_pending_mbnum -= mbnum;
3439 cfil_info_buf_verify(cfi_buf);
fe8ab488 3440
d9a64523
A
3441 if (outgoing) {
3442 error = sosend_reinject(so, NULL, data, NULL, 0);
fe8ab488 3443 if (error != 0) {
d9a64523
A
3444#if DATA_DEBUG
3445 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3446 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3447#endif
3448 break;
fe8ab488 3449 }
d9a64523
A
3450 // At least one injection succeeded, need to wake up pending threads.
3451 need_rwakeup = 1;
fe8ab488 3452 } else {
d9a64523 3453 data->m_flags |= M_SKIPCFIL;
fe8ab488
A
3454
3455 /*
d9a64523
A
3456 * NOTE: We currently only support TCP and UDP.
3457 * For RAWIP, MPTCP and message TCP we'll
fe8ab488
A
3458 * need to call the appropriate sbappendxxx()
3459 * of fix sock_inject_data_in()
3460 */
d9a64523 3461 if (IS_UDP(so) == TRUE) {
0a7de745 3462 if (sbappendchain(&so->so_rcv, data, 0)) {
d9a64523 3463 need_rwakeup = 1;
0a7de745 3464 }
d9a64523 3465 } else {
0a7de745 3466 if (sbappendstream(&so->so_rcv, data)) {
d9a64523 3467 need_rwakeup = 1;
0a7de745 3468 }
d9a64523 3469 }
fe8ab488 3470 }
fe8ab488 3471
0a7de745 3472 if (outgoing) {
fe8ab488 3473 OSAddAtomic64(datalen,
0a7de745
A
3474 &cfil_stats.cfs_inject_q_out_passed);
3475 } else {
fe8ab488 3476 OSAddAtomic64(datalen,
0a7de745
A
3477 &cfil_stats.cfs_inject_q_in_passed);
3478 }
d9a64523
A
3479
3480 count++;
fe8ab488
A
3481 }
3482
d9a64523
A
3483#if DATA_DEBUG | VERDICT_DEBUG
3484 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
0a7de745 3485 (uint64_t)VM_KERNEL_ADDRPERM(so), count);
d9a64523
A
3486#endif
3487
fe8ab488 3488 /* A single wakeup is for several packets is more efficient */
d9a64523 3489 if (need_rwakeup) {
0a7de745 3490 if (outgoing == TRUE) {
d9a64523 3491 sowwakeup(so);
0a7de745 3492 } else {
d9a64523 3493 sorwakeup(so);
0a7de745 3494 }
d9a64523 3495 }
fe8ab488 3496
d9a64523 3497 if (error != 0 && cfil_info) {
0a7de745 3498 if (error == ENOBUFS) {
fe8ab488 3499 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
0a7de745
A
3500 }
3501 if (error == ENOMEM) {
fe8ab488 3502 OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
0a7de745 3503 }
fe8ab488
A
3504
3505 if (outgoing) {
d9a64523 3506 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
fe8ab488
A
3507 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3508 } else {
d9a64523 3509 cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
fe8ab488
A
3510 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3511 }
3512 }
3513
3514 /*
3515 * Notify
3516 */
d9a64523 3517 if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
fe8ab488 3518 cfil_sock_notify_shutdown(so, SHUT_WR);
0a7de745 3519 if (cfil_sock_data_pending(&so->so_snd) == 0) {
fe8ab488 3520 soshutdownlock_final(so, SHUT_WR);
0a7de745 3521 }
fe8ab488 3522 }
d9a64523 3523 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
fe8ab488
A
3524 if (cfil_filters_attached(so) == 0) {
3525 CFIL_LOG(LOG_INFO, "so %llx waking",
0a7de745 3526 (uint64_t)VM_KERNEL_ADDRPERM(so));
d9a64523 3527 wakeup((caddr_t)cfil_info);
fe8ab488
A
3528 }
3529 }
3530
d9a64523 3531 CFIL_INFO_VERIFY(cfil_info);
fe8ab488 3532
0a7de745 3533 return error;
fe8ab488
A
3534}
3535
3536static int
d9a64523 3537cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
fe8ab488
A
3538{
3539 uint64_t passlen, curlen;
3540 mbuf_t data;
3541 unsigned int datalen;
3542 errno_t error = 0;
3543 struct cfil_entry *entry;
3544 struct cfe_buf *entrybuf;
3545 struct cfil_queue *pending_q;
3546
3547 CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
0a7de745 3548 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
fe8ab488
A
3549
3550 socket_lock_assert_owned(so);
3551
d9a64523 3552 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 3553 if (outgoing) {
fe8ab488 3554 entrybuf = &entry->cfe_snd;
0a7de745 3555 } else {
fe8ab488 3556 entrybuf = &entry->cfe_rcv;
0a7de745 3557 }
fe8ab488
A
3558
3559 pending_q = &entrybuf->cfe_pending_q;
3560
3561 passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3562
3563 /*
3564 * Locate the chunks of data that we can pass to the next filter
3565 * A data chunk must be on mbuf boundaries
3566 */
3567 curlen = 0;
3568 while ((data = cfil_queue_first(pending_q)) != NULL) {
d9a64523 3569 datalen = cfil_data_length(data, NULL, NULL);
fe8ab488 3570
d9a64523
A
3571#if DATA_DEBUG
3572 CFIL_LOG(LOG_DEBUG,
0a7de745
A
3573 "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3574 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3575 passlen, curlen);
d9a64523 3576#endif
fe8ab488 3577
0a7de745 3578 if (curlen + datalen > passlen) {
fe8ab488 3579 break;
0a7de745 3580 }
fe8ab488
A
3581
3582 cfil_queue_remove(pending_q, data, datalen);
3583
3584 curlen += datalen;
3585
3586 for (kcunit += 1;
0a7de745
A
3587 kcunit <= MAX_CONTENT_FILTER;
3588 kcunit++) {
d9a64523 3589 error = cfil_data_filter(so, cfil_info, kcunit, outgoing,
0a7de745 3590 data, datalen);
fe8ab488 3591 /* 0 means passed so we can continue */
0a7de745 3592 if (error != 0) {
fe8ab488 3593 break;
0a7de745 3594 }
fe8ab488
A
3595 }
3596 /* When data has passed all filters, re-inject */
3597 if (error == 0) {
3598 if (outgoing) {
3599 cfil_queue_enqueue(
d9a64523 3600 &cfil_info->cfi_snd.cfi_inject_q,
fe8ab488
A
3601 data, datalen);
3602 OSAddAtomic64(datalen,
0a7de745 3603 &cfil_stats.cfs_inject_q_out_enqueued);
fe8ab488
A
3604 } else {
3605 cfil_queue_enqueue(
d9a64523 3606 &cfil_info->cfi_rcv.cfi_inject_q,
fe8ab488
A
3607 data, datalen);
3608 OSAddAtomic64(datalen,
0a7de745 3609 &cfil_stats.cfs_inject_q_in_enqueued);
fe8ab488
A
3610 }
3611 }
3612 }
3613
d9a64523 3614 CFIL_INFO_VERIFY(cfil_info);
fe8ab488 3615
0a7de745 3616 return error;
fe8ab488
A
3617}
3618
3619int
d9a64523 3620cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
0a7de745 3621 uint64_t pass_offset, uint64_t peek_offset)
fe8ab488
A
3622{
3623 errno_t error = 0;
3e170ce0 3624 struct cfil_entry *entry = NULL;
fe8ab488
A
3625 struct cfe_buf *entrybuf;
3626 int updated = 0;
3627
3628 CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
3629
3630 socket_lock_assert_owned(so);
3631
d9a64523 3632 if (cfil_info == NULL) {
fe8ab488 3633 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
0a7de745 3634 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
3635 error = 0;
3636 goto done;
d9a64523 3637 } else if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488 3638 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745 3639 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
3640 error = EPIPE;
3641 goto done;
3642 }
3643
d9a64523 3644 entry = &cfil_info->cfi_entries[kcunit - 1];
0a7de745 3645 if (outgoing) {
fe8ab488 3646 entrybuf = &entry->cfe_snd;
0a7de745 3647 } else {
fe8ab488 3648 entrybuf = &entry->cfe_rcv;
0a7de745 3649 }
fe8ab488
A
3650
3651 /* Record updated offsets for this content filter */
3652 if (pass_offset > entrybuf->cfe_pass_offset) {
3653 entrybuf->cfe_pass_offset = pass_offset;
3654
0a7de745 3655 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
fe8ab488 3656 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
0a7de745 3657 }
fe8ab488
A
3658 updated = 1;
3659 } else {
3660 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
0a7de745 3661 pass_offset, entrybuf->cfe_pass_offset);
fe8ab488
A
3662 }
3663 /* Filter does not want or need to see data that's allowed to pass */
3664 if (peek_offset > entrybuf->cfe_pass_offset &&
0a7de745 3665 peek_offset > entrybuf->cfe_peek_offset) {
fe8ab488
A
3666 entrybuf->cfe_peek_offset = peek_offset;
3667 updated = 1;
3668 }
3669 /* Nothing to do */
0a7de745 3670 if (updated == 0) {
fe8ab488 3671 goto done;
0a7de745 3672 }
fe8ab488
A
3673
3674 /* Move data held in control queue to pending queue if needed */
d9a64523 3675 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
fe8ab488
A
3676 if (error != 0) {
3677 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
0a7de745 3678 error);
fe8ab488
A
3679 goto done;
3680 }
3681 error = EJUSTRETURN;
3682
3683done:
3684 /*
3685 * The filter is effectively detached when pass all from both sides
3686 * or when the socket is closed and no more data is waiting
3687 * to be delivered to the filter
3688 */
3e170ce0 3689 if (entry != NULL &&
fe8ab488
A
3690 ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3691 entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
d9a64523 3692 ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
fe8ab488
A
3693 cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3694 cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3695 entry->cfe_flags |= CFEF_CFIL_DETACHED;
d9a64523
A
3696#if LIFECYCLE_DEBUG
3697 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
0a7de745
A
3698 "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3699 "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
d9a64523 3700#endif
fe8ab488 3701 CFIL_LOG(LOG_INFO, "so %llx detached %u",
0a7de745 3702 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
d9a64523 3703 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
fe8ab488 3704 cfil_filters_attached(so) == 0) {
d9a64523
A
3705#if LIFECYCLE_DEBUG
3706 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
3707#endif
fe8ab488 3708 CFIL_LOG(LOG_INFO, "so %llx waking",
0a7de745 3709 (uint64_t)VM_KERNEL_ADDRPERM(so));
d9a64523 3710 wakeup((caddr_t)cfil_info);
fe8ab488
A
3711 }
3712 }
d9a64523 3713 CFIL_INFO_VERIFY(cfil_info);
fe8ab488 3714 CFIL_LOG(LOG_INFO, "return %d", error);
0a7de745 3715 return error;
fe8ab488
A
3716}
3717
3718/*
3719 * Update pass offset for socket when no data is pending
3720 */
3721static int
d9a64523 3722cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
fe8ab488
A
3723{
3724 struct cfi_buf *cfi_buf;
3725 struct cfil_entry *entry;
3726 struct cfe_buf *entrybuf;
3727 uint32_t kcunit;
3728 uint64_t pass_offset = 0;
3729
0a7de745
A
3730 if (cfil_info == NULL) {
3731 return 0;
3732 }
fe8ab488
A
3733
3734 CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
0a7de745 3735 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
fe8ab488
A
3736
3737 socket_lock_assert_owned(so);
3738
0a7de745 3739 if (outgoing) {
d9a64523 3740 cfi_buf = &cfil_info->cfi_snd;
0a7de745 3741 } else {
d9a64523 3742 cfi_buf = &cfil_info->cfi_rcv;
0a7de745 3743 }
d9a64523
A
3744
3745 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
0a7de745
A
3746 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
3747 cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
fe8ab488
A
3748
3749 if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
3750 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
d9a64523 3751 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488
A
3752
3753 /* Are we attached to a filter? */
0a7de745 3754 if (entry->cfe_filter == NULL) {
fe8ab488 3755 continue;
0a7de745 3756 }
fe8ab488 3757
0a7de745 3758 if (outgoing) {
fe8ab488 3759 entrybuf = &entry->cfe_snd;
0a7de745 3760 } else {
fe8ab488 3761 entrybuf = &entry->cfe_rcv;
0a7de745 3762 }
fe8ab488
A
3763
3764 if (pass_offset == 0 ||
0a7de745 3765 entrybuf->cfe_pass_offset < pass_offset) {
fe8ab488 3766 pass_offset = entrybuf->cfe_pass_offset;
0a7de745 3767 }
fe8ab488
A
3768 }
3769 cfi_buf->cfi_pass_offset = pass_offset;
3770 }
3771
d9a64523 3772 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
0a7de745 3773 (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
d9a64523 3774
0a7de745 3775 return 0;
fe8ab488
A
3776}
3777
3778int
d9a64523 3779cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
0a7de745 3780 uint64_t pass_offset, uint64_t peek_offset)
fe8ab488
A
3781{
3782 errno_t error = 0;
3783
3784 CFIL_LOG(LOG_INFO, "");
3785
3786 socket_lock_assert_owned(so);
3787
d9a64523 3788 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
fe8ab488
A
3789 if (error != 0) {
3790 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
0a7de745
A
3791 (uint64_t)VM_KERNEL_ADDRPERM(so),
3792 outgoing ? "out" : "in");
fe8ab488
A
3793 goto release;
3794 }
3795
d9a64523 3796 error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
0a7de745 3797 pass_offset, peek_offset);
fe8ab488 3798
d9a64523 3799 cfil_service_inject_queue(so, cfil_info, outgoing);
fe8ab488 3800
d9a64523 3801 cfil_set_socket_pass_offset(so, cfil_info, outgoing);
fe8ab488 3802release:
d9a64523 3803 CFIL_INFO_VERIFY(cfil_info);
fe8ab488
A
3804 cfil_release_sockbuf(so, outgoing);
3805
0a7de745 3806 return error;
fe8ab488
A
3807}
3808
3809
3810static void
d9a64523 3811cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
fe8ab488
A
3812{
3813 struct cfil_entry *entry;
3814 int kcunit;
3815 uint64_t drained;
3816
0a7de745 3817 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
fe8ab488 3818 goto done;
0a7de745 3819 }
fe8ab488
A
3820
3821 socket_lock_assert_owned(so);
3822
3823 /*
3824 * Flush the output queues and ignore errors as long as
3825 * we are attached
3826 */
d9a64523
A
3827 (void) cfil_acquire_sockbuf(so, cfil_info, 1);
3828 if (cfil_info != NULL) {
fe8ab488
A
3829 drained = 0;
3830 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
d9a64523 3831 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488
A
3832
3833 drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
d9a64523 3834 drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
fe8ab488 3835 }
d9a64523
A
3836 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
3837
fe8ab488 3838 if (drained) {
0a7de745 3839 if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488
A
3840 OSIncrementAtomic(
3841 &cfil_stats.cfs_flush_out_drop);
0a7de745 3842 } else {
fe8ab488
A
3843 OSIncrementAtomic(
3844 &cfil_stats.cfs_flush_out_close);
0a7de745 3845 }
fe8ab488
A
3846 }
3847 }
3848 cfil_release_sockbuf(so, 1);
3849
3850 /*
3851 * Flush the input queues
3852 */
d9a64523
A
3853 (void) cfil_acquire_sockbuf(so, cfil_info, 0);
3854 if (cfil_info != NULL) {
fe8ab488
A
3855 drained = 0;
3856 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
d9a64523 3857 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488 3858
0a7de745
A
3859 drained += cfil_queue_drain(
3860 &entry->cfe_rcv.cfe_ctl_q);
3861 drained += cfil_queue_drain(
3862 &entry->cfe_rcv.cfe_pending_q);
fe8ab488 3863 }
d9a64523
A
3864 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
3865
fe8ab488 3866 if (drained) {
0a7de745 3867 if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488
A
3868 OSIncrementAtomic(
3869 &cfil_stats.cfs_flush_in_drop);
0a7de745 3870 } else {
fe8ab488
A
3871 OSIncrementAtomic(
3872 &cfil_stats.cfs_flush_in_close);
0a7de745 3873 }
fe8ab488
A
3874 }
3875 }
3876 cfil_release_sockbuf(so, 0);
3877done:
d9a64523 3878 CFIL_INFO_VERIFY(cfil_info);
fe8ab488
A
3879}
3880
3881int
d9a64523 3882cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
fe8ab488
A
3883{
3884 errno_t error = 0;
3885 struct cfil_entry *entry;
3886 struct proc *p;
3887
0a7de745 3888 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
fe8ab488 3889 goto done;
0a7de745 3890 }
fe8ab488
A
3891
3892 socket_lock_assert_owned(so);
3893
d9a64523 3894 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488
A
3895
3896 /* Are we attached to the filter? */
0a7de745 3897 if (entry->cfe_filter == NULL) {
fe8ab488 3898 goto done;
0a7de745 3899 }
fe8ab488 3900
d9a64523 3901 cfil_info->cfi_flags |= CFIF_DROP;
fe8ab488
A
3902
3903 p = current_proc();
3904
3e170ce0
A
3905 /*
3906 * Force the socket to be marked defunct
3907 * (forcing fixed along with rdar://19391339)
3908 */
d9a64523
A
3909 if (so->so_cfil_db == NULL) {
3910 error = sosetdefunct(p, so,
0a7de745
A
3911 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3912 FALSE);
fe8ab488 3913
d9a64523 3914 /* Flush the socket buffer and disconnect */
0a7de745 3915 if (error == 0) {
d9a64523 3916 error = sodefunct(p, so,
0a7de745
A
3917 SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
3918 }
d9a64523 3919 }
fe8ab488
A
3920
3921 /* The filter is done, mark as detached */
3922 entry->cfe_flags |= CFEF_CFIL_DETACHED;
d9a64523
A
3923#if LIFECYCLE_DEBUG
3924 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
3925#endif
fe8ab488 3926 CFIL_LOG(LOG_INFO, "so %llx detached %u",
0a7de745 3927 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
fe8ab488
A
3928
3929 /* Pending data needs to go */
d9a64523 3930 cfil_flush_queues(so, cfil_info);
fe8ab488 3931
d9a64523 3932 if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
fe8ab488
A
3933 if (cfil_filters_attached(so) == 0) {
3934 CFIL_LOG(LOG_INFO, "so %llx waking",
0a7de745 3935 (uint64_t)VM_KERNEL_ADDRPERM(so));
d9a64523 3936 wakeup((caddr_t)cfil_info);
fe8ab488
A
3937 }
3938 }
3939done:
0a7de745 3940 return error;
fe8ab488
A
3941}
3942
5ba3f43e
A
3943int
3944cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
3945{
3946 errno_t error = 0;
d9a64523 3947 struct cfil_info *cfil_info = NULL;
5ba3f43e
A
3948
3949 bool cfil_attached = false;
3950 struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
d9a64523
A
3951
3952 // Search and lock socket
5ba3f43e
A
3953 struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
3954 if (so == NULL) {
3955 error = ENOENT;
3956 } else {
3957 // The client gets a pass automatically
d9a64523 3958 cfil_info = (so->so_cfil_db != NULL) ?
0a7de745 3959 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
d9a64523 3960
5ba3f43e 3961 if (cfil_attached) {
d9a64523
A
3962#if VERDICT_DEBUG
3963 if (cfil_info != NULL) {
3964 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
0a7de745
A
3965 cfil_info->cfi_hash_entry ? "UDP" : "TCP",
3966 (uint64_t)VM_KERNEL_ADDRPERM(so),
3967 cfil_info->cfi_sock_id);
d9a64523
A
3968 }
3969#endif
3970 (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3971 (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
5ba3f43e
A
3972 } else {
3973 so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
3974 }
3975 socket_unlock(so, 1);
3976 }
3977
0a7de745 3978 return error;
5ba3f43e
A
3979}
3980
fe8ab488 3981static int
d9a64523 3982cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
fe8ab488
A
3983{
3984 struct cfil_entry *entry;
3985 struct cfe_buf *entrybuf;
3986 uint32_t kcunit;
3987
3988 CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
0a7de745 3989 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
fe8ab488
A
3990
3991 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
d9a64523 3992 entry = &cfil_info->cfi_entries[kcunit - 1];
fe8ab488
A
3993
3994 /* Are we attached to the filter? */
0a7de745 3995 if (entry->cfe_filter == NULL) {
fe8ab488 3996 continue;
0a7de745 3997 }
fe8ab488 3998
0a7de745 3999 if (outgoing) {
fe8ab488 4000 entrybuf = &entry->cfe_snd;
0a7de745 4001 } else {
fe8ab488 4002 entrybuf = &entry->cfe_rcv;
0a7de745 4003 }
fe8ab488
A
4004
4005 entrybuf->cfe_ctl_q.q_start += datalen;
4006 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4007 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
0a7de745 4008 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
fe8ab488 4009 entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
0a7de745 4010 }
fe8ab488
A
4011
4012 entrybuf->cfe_ctl_q.q_end += datalen;
4013
4014 entrybuf->cfe_pending_q.q_start += datalen;
4015 entrybuf->cfe_pending_q.q_end += datalen;
4016 }
d9a64523 4017 CFIL_INFO_VERIFY(cfil_info);
0a7de745 4018 return 0;
fe8ab488
A
4019}
4020
4021int
d9a64523 4022cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
0a7de745 4023 struct mbuf *data, struct mbuf *control, uint32_t flags)
fe8ab488
A
4024{
4025#pragma unused(to, control, flags)
4026 errno_t error = 0;
4027 unsigned int datalen;
d9a64523
A
4028 int mbcnt = 0;
4029 int mbnum = 0;
fe8ab488
A
4030 int kcunit;
4031 struct cfi_buf *cfi_buf;
d9a64523 4032 struct mbuf *chain = NULL;
fe8ab488 4033
d9a64523 4034 if (cfil_info == NULL) {
fe8ab488 4035 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
0a7de745 4036 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4037 error = 0;
4038 goto done;
d9a64523 4039 } else if (cfil_info->cfi_flags & CFIF_DROP) {
fe8ab488 4040 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745 4041 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4042 error = EPIPE;
4043 goto done;
4044 }
4045
d9a64523 4046 datalen = cfil_data_length(data, &mbcnt, &mbnum);
fe8ab488 4047
0a7de745 4048 if (outgoing) {
d9a64523 4049 cfi_buf = &cfil_info->cfi_snd;
0a7de745 4050 } else {
d9a64523 4051 cfi_buf = &cfil_info->cfi_rcv;
0a7de745 4052 }
fe8ab488
A
4053
4054 cfi_buf->cfi_pending_last += datalen;
4055 cfi_buf->cfi_pending_mbcnt += mbcnt;
d9a64523
A
4056 cfi_buf->cfi_pending_mbnum += mbnum;
4057
4058 if (IS_UDP(so)) {
4059 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
0a7de745 4060 cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
d9a64523
A
4061 cfi_buf->cfi_tail_drop_cnt++;
4062 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4063 cfi_buf->cfi_pending_mbnum -= mbnum;
0a7de745 4064 return EPIPE;
d9a64523
A
4065 }
4066 }
4067
fe8ab488
A
4068 cfil_info_buf_verify(cfi_buf);
4069
d9a64523 4070#if DATA_DEBUG
0a7de745
A
4071 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u cfi_pass_offset %llu",
4072 (uint64_t)VM_KERNEL_ADDRPERM(so),
4073 outgoing ? "OUT" : "IN",
4074 (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4075 (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4076 cfi_buf->cfi_pending_last,
4077 cfi_buf->cfi_pending_mbcnt,
4078 cfi_buf->cfi_pass_offset);
d9a64523 4079#endif
fe8ab488
A
4080
4081 /* Fast path when below pass offset */
4082 if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
d9a64523
A
4083 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4084#if DATA_DEBUG
4085 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4086#endif
fe8ab488
A
4087 } else {
4088 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
d9a64523
A
4089 // Is cfil attached to this filter?
4090 if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4091 if (IS_UDP(so)) {
4092 /* UDP only:
4093 * Chain addr (incoming only TDB), control (optional) and data into one chain.
4094 * This full chain will be reinjected into socket after recieving verdict.
4095 */
4096 (void) cfil_udp_save_socket_state(cfil_info, data);
4097 chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4098 if (chain == NULL) {
0a7de745 4099 return ENOBUFS;
d9a64523
A
4100 }
4101 data = chain;
4102 }
4103 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
0a7de745 4104 datalen);
d9a64523 4105 }
fe8ab488 4106 /* 0 means passed so continue with next filter */
0a7de745 4107 if (error != 0) {
fe8ab488 4108 break;
0a7de745 4109 }
fe8ab488
A
4110 }
4111 }
4112
4113 /* Move cursor if no filter claimed the data */
4114 if (error == 0) {
4115 cfi_buf->cfi_pending_first += datalen;
4116 cfi_buf->cfi_pending_mbcnt -= mbcnt;
d9a64523 4117 cfi_buf->cfi_pending_mbnum -= mbnum;
fe8ab488
A
4118 cfil_info_buf_verify(cfi_buf);
4119 }
4120done:
d9a64523 4121 CFIL_INFO_VERIFY(cfil_info);
fe8ab488 4122
0a7de745 4123 return error;
fe8ab488
A
4124}
4125
4126/*
4127 * Callback from socket layer sosendxxx()
4128 */
4129int
4130cfil_sock_data_out(struct socket *so, struct sockaddr *to,
0a7de745 4131 struct mbuf *data, struct mbuf *control, uint32_t flags)
fe8ab488
A
4132{
4133 int error = 0;
0a7de745 4134
d9a64523 4135 if (IS_UDP(so)) {
0a7de745
A
4136 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4137 }
fe8ab488 4138
0a7de745
A
4139 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4140 return 0;
4141 }
fe8ab488
A
4142
4143 socket_lock_assert_owned(so);
4144
4145 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4146 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745
A
4147 (uint64_t)VM_KERNEL_ADDRPERM(so));
4148 return EPIPE;
fe8ab488
A
4149 }
4150 if (control != NULL) {
4151 CFIL_LOG(LOG_ERR, "so %llx control",
0a7de745 4152 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4153 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4154 }
4155 if ((flags & MSG_OOB)) {
4156 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
0a7de745 4157 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4158 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4159 }
0a7de745 4160 if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
fe8ab488 4161 panic("so %p SB_LOCK not set", so);
0a7de745 4162 }
fe8ab488 4163
0a7de745 4164 if (so->so_snd.sb_cfil_thread != NULL) {
fe8ab488 4165 panic("%s sb_cfil_thread %p not NULL", __func__,
0a7de745
A
4166 so->so_snd.sb_cfil_thread);
4167 }
fe8ab488 4168
d9a64523 4169 error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
fe8ab488 4170
0a7de745 4171 return error;
fe8ab488
A
4172}
4173
4174/*
4175 * Callback from socket layer sbappendxxx()
4176 */
4177int
4178cfil_sock_data_in(struct socket *so, struct sockaddr *from,
0a7de745 4179 struct mbuf *data, struct mbuf *control, uint32_t flags)
fe8ab488
A
4180{
4181 int error = 0;
4182
d9a64523 4183 if (IS_UDP(so)) {
0a7de745
A
4184 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4185 }
d9a64523 4186
0a7de745
A
4187 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4188 return 0;
4189 }
fe8ab488
A
4190
4191 socket_lock_assert_owned(so);
4192
4193 if (so->so_cfil->cfi_flags & CFIF_DROP) {
4194 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745
A
4195 (uint64_t)VM_KERNEL_ADDRPERM(so));
4196 return EPIPE;
fe8ab488
A
4197 }
4198 if (control != NULL) {
4199 CFIL_LOG(LOG_ERR, "so %llx control",
0a7de745 4200 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4201 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4202 }
4203 if (data->m_type == MT_OOBDATA) {
4204 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
0a7de745 4205 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4206 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4207 }
d9a64523 4208 error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
fe8ab488 4209
0a7de745 4210 return error;
fe8ab488
A
4211}
4212
4213/*
4214 * Callback from socket layer soshutdownxxx()
4215 *
4216 * We may delay the shutdown write if there's outgoing data in process.
4217 *
4218 * There is no point in delaying the shutdown read because the process
4219 * indicated that it does not want to read anymore data.
4220 */
4221int
4222cfil_sock_shutdown(struct socket *so, int *how)
4223{
4224 int error = 0;
4225
d9a64523 4226 if (IS_UDP(so)) {
0a7de745 4227 return cfil_sock_udp_shutdown(so, how);
d9a64523
A
4228 }
4229
0a7de745 4230 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
fe8ab488 4231 goto done;
0a7de745 4232 }
fe8ab488
A
4233
4234 socket_lock_assert_owned(so);
4235
4236 CFIL_LOG(LOG_INFO, "so %llx how %d",
0a7de745 4237 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
fe8ab488
A
4238
4239 /*
4240 * Check the state of the socket before the content filter
4241 */
4242 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4243 /* read already shut down */
4244 error = ENOTCONN;
4245 goto done;
4246 }
4247 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4248 /* write already shut down */
4249 error = ENOTCONN;
4250 goto done;
4251 }
4252
4253 if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4254 CFIL_LOG(LOG_ERR, "so %llx drop set",
0a7de745 4255 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4256 goto done;
4257 }
4258
4259 /*
4260 * shutdown read: SHUT_RD or SHUT_RDWR
4261 */
4262 if (*how != SHUT_WR) {
4263 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4264 error = ENOTCONN;
4265 goto done;
4266 }
4267 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4268 cfil_sock_notify_shutdown(so, SHUT_RD);
4269 }
4270 /*
4271 * shutdown write: SHUT_WR or SHUT_RDWR
4272 */
4273 if (*how != SHUT_RD) {
4274 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4275 error = ENOTCONN;
4276 goto done;
4277 }
4278 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4279 cfil_sock_notify_shutdown(so, SHUT_WR);
4280 /*
4281 * When outgoing data is pending, we delay the shutdown at the
4282 * protocol level until the content filters give the final
4283 * verdict on the pending data.
4284 */
4285 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4286 /*
4287 * When shutting down the read and write sides at once
4288 * we can proceed to the final shutdown of the read
4289 * side. Otherwise, we just return.
4290 */
4291 if (*how == SHUT_WR) {
4292 error = EJUSTRETURN;
4293 } else if (*how == SHUT_RDWR) {
4294 *how = SHUT_RD;
4295 }
4296 }
4297 }
4298done:
0a7de745 4299 return error;
fe8ab488
A
4300}
4301
4302/*
4303 * This is called when the socket is closed and there is no more
4304 * opportunity for filtering
4305 */
4306void
4307cfil_sock_is_closed(struct socket *so)
4308{
4309 errno_t error = 0;
4310 int kcunit;
4311
d9a64523
A
4312 if (IS_UDP(so)) {
4313 cfil_sock_udp_is_closed(so);
4314 return;
4315 }
4316
0a7de745 4317 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
fe8ab488 4318 return;
0a7de745 4319 }
fe8ab488
A
4320
4321 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4322
4323 socket_lock_assert_owned(so);
4324
4325 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4326 /* Let the filters know of the closing */
d9a64523 4327 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
fe8ab488
A
4328 }
4329
4330 /* Last chance to push passed data out */
d9a64523 4331 error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
0a7de745 4332 if (error == 0) {
d9a64523 4333 cfil_service_inject_queue(so, so->so_cfil, 1);
0a7de745 4334 }
fe8ab488
A
4335 cfil_release_sockbuf(so, 1);
4336
4337 so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4338
4339 /* Pending data needs to go */
d9a64523 4340 cfil_flush_queues(so, so->so_cfil);
fe8ab488
A
4341
4342 CFIL_INFO_VERIFY(so->so_cfil);
4343}
4344
4345/*
4346 * This is called when the socket is disconnected so let the filters
4347 * know about the disconnection and that no more data will come
4348 *
4349 * The how parameter has the same values as soshutown()
4350 */
4351void
4352cfil_sock_notify_shutdown(struct socket *so, int how)
4353{
4354 errno_t error = 0;
4355 int kcunit;
4356
d9a64523
A
4357 if (IS_UDP(so)) {
4358 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4359 return;
4360 }
4361
0a7de745 4362 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
fe8ab488 4363 return;
0a7de745 4364 }
fe8ab488
A
4365
4366 CFIL_LOG(LOG_INFO, "so %llx how %d",
0a7de745 4367 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
fe8ab488
A
4368
4369 socket_lock_assert_owned(so);
4370
4371 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4372 /* Disconnect incoming side */
0a7de745 4373 if (how != SHUT_WR) {
d9a64523 4374 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
0a7de745 4375 }
fe8ab488 4376 /* Disconnect outgoing side */
0a7de745 4377 if (how != SHUT_RD) {
d9a64523 4378 error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
0a7de745 4379 }
fe8ab488
A
4380 }
4381}
4382
4383static int
4384cfil_filters_attached(struct socket *so)
4385{
4386 struct cfil_entry *entry;
4387 uint32_t kcunit;
4388 int attached = 0;
4389
d9a64523
A
4390 if (IS_UDP(so)) {
4391 return cfil_filters_udp_attached(so, FALSE);
4392 }
4393
0a7de745
A
4394 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4395 return 0;
4396 }
fe8ab488
A
4397
4398 socket_lock_assert_owned(so);
4399
4400 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4401 entry = &so->so_cfil->cfi_entries[kcunit - 1];
4402
4403 /* Are we attached to the filter? */
0a7de745 4404 if (entry->cfe_filter == NULL) {
fe8ab488 4405 continue;
0a7de745
A
4406 }
4407 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
fe8ab488 4408 continue;
0a7de745
A
4409 }
4410 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
fe8ab488 4411 continue;
0a7de745 4412 }
fe8ab488
A
4413 attached = 1;
4414 break;
4415 }
4416
0a7de745 4417 return attached;
fe8ab488
A
4418}
4419
4420/*
4421 * This is called when the socket is closed and we are waiting for
4422 * the filters to gives the final pass or drop
4423 */
4424void
4425cfil_sock_close_wait(struct socket *so)
4426{
4427 lck_mtx_t *mutex_held;
4428 struct timespec ts;
4429 int error;
4430
d9a64523
A
4431 if (IS_UDP(so)) {
4432 cfil_sock_udp_close_wait(so);
4433 return;
4434 }
4435
0a7de745 4436 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
fe8ab488 4437 return;
0a7de745 4438 }
fe8ab488
A
4439
4440 CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4441
0a7de745 4442 if (so->so_proto->pr_getlock != NULL) {
5ba3f43e 4443 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 4444 } else {
fe8ab488 4445 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 4446 }
5ba3f43e 4447 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
fe8ab488
A
4448
4449 while (cfil_filters_attached(so)) {
4450 /*
4451 * Notify the filters we are going away so they can detach
4452 */
4453 cfil_sock_notify_shutdown(so, SHUT_RDWR);
4454
4455 /*
4456 * Make sure we need to wait after the filter are notified
4457 * of the disconnection
4458 */
0a7de745 4459 if (cfil_filters_attached(so) == 0) {
fe8ab488 4460 break;
0a7de745 4461 }
fe8ab488
A
4462
4463 CFIL_LOG(LOG_INFO, "so %llx waiting",
0a7de745 4464 (uint64_t)VM_KERNEL_ADDRPERM(so));
fe8ab488
A
4465
4466 ts.tv_sec = cfil_close_wait_timeout / 1000;
4467 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
0a7de745 4468 NSEC_PER_USEC * 1000;
fe8ab488
A
4469
4470 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4471 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
d9a64523 4472 error = msleep((caddr_t)so->so_cfil, mutex_held,
0a7de745 4473 PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
fe8ab488
A
4474 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4475
4476 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
0a7de745 4477 (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
fe8ab488
A
4478
4479 /*
4480 * Force close in case of timeout
4481 */
4482 if (error != 0) {
4483 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4484 break;
4485 }
4486 }
fe8ab488
A
4487}
4488
4489/*
4490 * Returns the size of the data held by the content filter by using
4491 */
4492int32_t
4493cfil_sock_data_pending(struct sockbuf *sb)
4494{
4495 struct socket *so = sb->sb_so;
4496 uint64_t pending = 0;
4497
d9a64523 4498 if (IS_UDP(so)) {
0a7de745 4499 return cfil_sock_udp_data_pending(sb, FALSE);
d9a64523
A
4500 }
4501
fe8ab488
A
4502 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
4503 struct cfi_buf *cfi_buf;
4504
4505 socket_lock_assert_owned(so);
4506
0a7de745 4507 if ((sb->sb_flags & SB_RECV) == 0) {
fe8ab488 4508 cfi_buf = &so->so_cfil->cfi_snd;
0a7de745 4509 } else {
fe8ab488 4510 cfi_buf = &so->so_cfil->cfi_rcv;
0a7de745 4511 }
fe8ab488
A
4512
4513 pending = cfi_buf->cfi_pending_last -
0a7de745 4514 cfi_buf->cfi_pending_first;
fe8ab488
A
4515
4516 /*
4517 * If we are limited by the "chars of mbufs used" roughly
4518 * adjust so we won't overcommit
4519 */
0a7de745 4520 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
fe8ab488 4521 pending = cfi_buf->cfi_pending_mbcnt;
0a7de745 4522 }
fe8ab488
A
4523 }
4524
4525 VERIFY(pending < INT32_MAX);
4526
4527 return (int32_t)(pending);
4528}
4529
4530/*
4531 * Return the socket buffer space used by data being held by content filters
4532 * so processes won't clog the socket buffer
4533 */
4534int32_t
4535cfil_sock_data_space(struct sockbuf *sb)
4536{
4537 struct socket *so = sb->sb_so;
4538 uint64_t pending = 0;
4539
d9a64523 4540 if (IS_UDP(so)) {
0a7de745 4541 return cfil_sock_udp_data_pending(sb, TRUE);
d9a64523
A
4542 }
4543
fe8ab488 4544 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
0a7de745 4545 so->so_snd.sb_cfil_thread != current_thread()) {
fe8ab488
A
4546 struct cfi_buf *cfi_buf;
4547
4548 socket_lock_assert_owned(so);
4549
0a7de745 4550 if ((sb->sb_flags & SB_RECV) == 0) {
fe8ab488 4551 cfi_buf = &so->so_cfil->cfi_snd;
0a7de745 4552 } else {
fe8ab488 4553 cfi_buf = &so->so_cfil->cfi_rcv;
0a7de745 4554 }
fe8ab488
A
4555
4556 pending = cfi_buf->cfi_pending_last -
0a7de745 4557 cfi_buf->cfi_pending_first;
fe8ab488
A
4558
4559 /*
4560 * If we are limited by the "chars of mbufs used" roughly
4561 * adjust so we won't overcommit
4562 */
0a7de745 4563 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
fe8ab488 4564 pending = cfi_buf->cfi_pending_mbcnt;
0a7de745 4565 }
fe8ab488
A
4566 }
4567
4568 VERIFY(pending < INT32_MAX);
4569
4570 return (int32_t)(pending);
4571}
4572
4573/*
4574 * A callback from the socket and protocol layer when data becomes
4575 * available in the socket buffer to give a chance for the content filter
4576 * to re-inject data that was held back
4577 */
4578void
4579cfil_sock_buf_update(struct sockbuf *sb)
4580{
4581 int outgoing;
4582 int error;
4583 struct socket *so = sb->sb_so;
4584
0a7de745 4585 if (IS_UDP(so)) {
d9a64523
A
4586 cfil_sock_udp_buf_update(sb);
4587 return;
0a7de745 4588 }
d9a64523 4589
0a7de745 4590 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
fe8ab488 4591 return;
0a7de745 4592 }
fe8ab488 4593
0a7de745 4594 if (!cfil_sbtrim) {
fe8ab488 4595 return;
0a7de745 4596 }
fe8ab488
A
4597
4598 socket_lock_assert_owned(so);
4599
4600 if ((sb->sb_flags & SB_RECV) == 0) {
0a7de745 4601 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
fe8ab488 4602 return;
0a7de745 4603 }
fe8ab488
A
4604 outgoing = 1;
4605 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
4606 } else {
0a7de745 4607 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
fe8ab488 4608 return;
0a7de745 4609 }
fe8ab488
A
4610 outgoing = 0;
4611 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
4612 }
4613
4614 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
0a7de745 4615 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
fe8ab488 4616
d9a64523 4617 error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
0a7de745 4618 if (error == 0) {
d9a64523 4619 cfil_service_inject_queue(so, so->so_cfil, outgoing);
0a7de745 4620 }
fe8ab488
A
4621 cfil_release_sockbuf(so, outgoing);
4622}
4623
4624int
4625sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
0a7de745 4626 struct sysctl_req *req)
fe8ab488
A
4627{
4628#pragma unused(oidp, arg1, arg2)
4629 int error = 0;
4630 size_t len = 0;
4631 u_int32_t i;
4632
4633 /* Read only */
0a7de745
A
4634 if (req->newptr != USER_ADDR_NULL) {
4635 return EPERM;
4636 }
fe8ab488
A
4637
4638 cfil_rw_lock_shared(&cfil_lck_rw);
4639
4640 for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
4641 struct cfil_filter_stat filter_stat;
4642 struct content_filter *cfc = content_filters[i];
4643
0a7de745 4644 if (cfc == NULL) {
fe8ab488 4645 continue;
0a7de745 4646 }
fe8ab488
A
4647
4648 /* If just asking for the size */
4649 if (req->oldptr == USER_ADDR_NULL) {
4650 len += sizeof(struct cfil_filter_stat);
4651 continue;
4652 }
4653
4654 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
4655 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
4656 filter_stat.cfs_filter_id = cfc->cf_kcunit;
4657 filter_stat.cfs_flags = cfc->cf_flags;
4658 filter_stat.cfs_sock_count = cfc->cf_sock_count;
4659 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
4660
4661 error = SYSCTL_OUT(req, &filter_stat,
0a7de745
A
4662 sizeof(struct cfil_filter_stat));
4663 if (error != 0) {
fe8ab488 4664 break;
0a7de745 4665 }
fe8ab488
A
4666 }
4667 /* If just asking for the size */
0a7de745 4668 if (req->oldptr == USER_ADDR_NULL) {
fe8ab488 4669 req->oldidx = len;
0a7de745 4670 }
fe8ab488
A
4671
4672 cfil_rw_unlock_shared(&cfil_lck_rw);
4673
d9a64523
A
4674#if SHOW_DEBUG
4675 if (req->oldptr != USER_ADDR_NULL) {
4676 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
4677 cfil_filter_show(i);
4678 }
4679 }
4680#endif
4681
0a7de745 4682 return error;
fe8ab488
A
4683}
4684
0a7de745
A
4685static int
4686sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4687 struct sysctl_req *req)
fe8ab488
A
4688{
4689#pragma unused(oidp, arg1, arg2)
4690 int error = 0;
4691 u_int32_t i;
4692 struct cfil_info *cfi;
4693
4694 /* Read only */
0a7de745
A
4695 if (req->newptr != USER_ADDR_NULL) {
4696 return EPERM;
4697 }
fe8ab488
A
4698
4699 cfil_rw_lock_shared(&cfil_lck_rw);
4700
4701 /*
4702 * If just asking for the size,
4703 */
4704 if (req->oldptr == USER_ADDR_NULL) {
4705 req->oldidx = cfil_sock_attached_count *
0a7de745 4706 sizeof(struct cfil_sock_stat);
fe8ab488
A
4707 /* Bump the length in case new sockets gets attached */
4708 req->oldidx += req->oldidx >> 3;
4709 goto done;
4710 }
4711
4712 TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
4713 struct cfil_entry *entry;
4714 struct cfil_sock_stat stat;
4715 struct socket *so = cfi->cfi_so;
4716
4717 bzero(&stat, sizeof(struct cfil_sock_stat));
4718 stat.cfs_len = sizeof(struct cfil_sock_stat);
4719 stat.cfs_sock_id = cfi->cfi_sock_id;
4720 stat.cfs_flags = cfi->cfi_flags;
4721
4722 if (so != NULL) {
4723 stat.cfs_pid = so->last_pid;
4724 memcpy(stat.cfs_uuid, so->last_uuid,
0a7de745 4725 sizeof(uuid_t));
fe8ab488
A
4726 if (so->so_flags & SOF_DELEGATED) {
4727 stat.cfs_e_pid = so->e_pid;
4728 memcpy(stat.cfs_e_uuid, so->e_uuid,
0a7de745 4729 sizeof(uuid_t));
fe8ab488
A
4730 } else {
4731 stat.cfs_e_pid = so->last_pid;
4732 memcpy(stat.cfs_e_uuid, so->last_uuid,
0a7de745 4733 sizeof(uuid_t));
fe8ab488 4734 }
d9a64523
A
4735
4736 stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
4737 stat.cfs_sock_type = so->so_proto->pr_type;
4738 stat.cfs_sock_protocol = so->so_proto->pr_protocol;
fe8ab488
A
4739 }
4740
4741 stat.cfs_snd.cbs_pending_first =
0a7de745 4742 cfi->cfi_snd.cfi_pending_first;
fe8ab488 4743 stat.cfs_snd.cbs_pending_last =
0a7de745 4744 cfi->cfi_snd.cfi_pending_last;
fe8ab488 4745 stat.cfs_snd.cbs_inject_q_len =
0a7de745 4746 cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
fe8ab488 4747 stat.cfs_snd.cbs_pass_offset =
0a7de745 4748 cfi->cfi_snd.cfi_pass_offset;
fe8ab488
A
4749
4750 stat.cfs_rcv.cbs_pending_first =
0a7de745 4751 cfi->cfi_rcv.cfi_pending_first;
fe8ab488 4752 stat.cfs_rcv.cbs_pending_last =
0a7de745 4753 cfi->cfi_rcv.cfi_pending_last;
fe8ab488 4754 stat.cfs_rcv.cbs_inject_q_len =
0a7de745 4755 cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
fe8ab488 4756 stat.cfs_rcv.cbs_pass_offset =
0a7de745 4757 cfi->cfi_rcv.cfi_pass_offset;
fe8ab488
A
4758
4759 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
4760 struct cfil_entry_stat *estat;
4761 struct cfe_buf *ebuf;
4762 struct cfe_buf_stat *sbuf;
4763
4764 entry = &cfi->cfi_entries[i];
4765
4766 estat = &stat.ces_entries[i];
4767
4768 estat->ces_len = sizeof(struct cfil_entry_stat);
4769 estat->ces_filter_id = entry->cfe_filter ?
0a7de745 4770 entry->cfe_filter->cf_kcunit : 0;
fe8ab488
A
4771 estat->ces_flags = entry->cfe_flags;
4772 estat->ces_necp_control_unit =
0a7de745 4773 entry->cfe_necp_control_unit;
fe8ab488
A
4774
4775 estat->ces_last_event.tv_sec =
0a7de745 4776 (int64_t)entry->cfe_last_event.tv_sec;
fe8ab488 4777 estat->ces_last_event.tv_usec =
0a7de745 4778 (int64_t)entry->cfe_last_event.tv_usec;
fe8ab488
A
4779
4780 estat->ces_last_action.tv_sec =
0a7de745 4781 (int64_t)entry->cfe_last_action.tv_sec;
fe8ab488 4782 estat->ces_last_action.tv_usec =
0a7de745 4783 (int64_t)entry->cfe_last_action.tv_usec;
fe8ab488
A
4784
4785 ebuf = &entry->cfe_snd;
4786 sbuf = &estat->ces_snd;
4787 sbuf->cbs_pending_first =
0a7de745 4788 cfil_queue_offset_first(&ebuf->cfe_pending_q);
fe8ab488 4789 sbuf->cbs_pending_last =
0a7de745 4790 cfil_queue_offset_last(&ebuf->cfe_pending_q);
fe8ab488 4791 sbuf->cbs_ctl_first =
0a7de745 4792 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
fe8ab488 4793 sbuf->cbs_ctl_last =
0a7de745 4794 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
fe8ab488
A
4795 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4796 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4797 sbuf->cbs_peeked = ebuf->cfe_peeked;
4798
4799 ebuf = &entry->cfe_rcv;
4800 sbuf = &estat->ces_rcv;
4801 sbuf->cbs_pending_first =
0a7de745 4802 cfil_queue_offset_first(&ebuf->cfe_pending_q);
fe8ab488 4803 sbuf->cbs_pending_last =
0a7de745 4804 cfil_queue_offset_last(&ebuf->cfe_pending_q);
fe8ab488 4805 sbuf->cbs_ctl_first =
0a7de745 4806 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
fe8ab488 4807 sbuf->cbs_ctl_last =
0a7de745 4808 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
fe8ab488
A
4809 sbuf->cbs_pass_offset = ebuf->cfe_pass_offset;
4810 sbuf->cbs_peek_offset = ebuf->cfe_peek_offset;
4811 sbuf->cbs_peeked = ebuf->cfe_peeked;
4812 }
4813 error = SYSCTL_OUT(req, &stat,
0a7de745
A
4814 sizeof(struct cfil_sock_stat));
4815 if (error != 0) {
fe8ab488 4816 break;
0a7de745 4817 }
fe8ab488
A
4818 }
4819done:
4820 cfil_rw_unlock_shared(&cfil_lck_rw);
4821
d9a64523
A
4822#if SHOW_DEBUG
4823 if (req->oldptr != USER_ADDR_NULL) {
4824 cfil_info_show();
4825 }
4826#endif
4827
0a7de745 4828 return error;
d9a64523
A
4829}
4830
4831/*
4832 * UDP Socket Support
4833 */
4834static void
4835cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
4836{
0a7de745
A
4837 char local[MAX_IPv6_STR_LEN + 6];
4838 char remote[MAX_IPv6_STR_LEN + 6];
4839 const void *addr;
d9a64523
A
4840
4841 // No sock or not UDP, no-op
0a7de745
A
4842 if (so == NULL || entry == NULL) {
4843 return;
4844 }
4845
4846 local[0] = remote[0] = 0x0;
4847
4848 switch (entry->cfentry_family) {
4849 case AF_INET6:
4850 addr = &entry->cfentry_laddr.addr6;
4851 inet_ntop(AF_INET6, addr, local, sizeof(local));
4852 addr = &entry->cfentry_faddr.addr6;
4853 inet_ntop(AF_INET6, addr, remote, sizeof(local));
4854 break;
4855 case AF_INET:
4856 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
4857 inet_ntop(AF_INET, addr, local, sizeof(local));
4858 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
4859 inet_ntop(AF_INET, addr, remote, sizeof(local));
4860 break;
4861 default:
4862 return;
4863 }
4864
d9a64523 4865 CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
0a7de745
A
4866 msg,
4867 (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
4868 ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
d9a64523
A
4869}
4870
4871static void
4872cfil_inp_log(int level, struct socket *so, const char* msg)
4873{
0a7de745
A
4874 struct inpcb *inp = NULL;
4875 char local[MAX_IPv6_STR_LEN + 6];
4876 char remote[MAX_IPv6_STR_LEN + 6];
4877 const void *addr;
4878
4879 if (so == NULL) {
4880 return;
4881 }
4882
4883 inp = sotoinpcb(so);
4884 if (inp == NULL) {
4885 return;
4886 }
4887
4888 local[0] = remote[0] = 0x0;
d9a64523
A
4889
4890#if INET6
0a7de745
A
4891 if (inp->inp_vflag & INP_IPV6) {
4892 addr = &inp->in6p_laddr.s6_addr32;
4893 inet_ntop(AF_INET6, addr, local, sizeof(local));
4894 addr = &inp->in6p_faddr.s6_addr32;
4895 inet_ntop(AF_INET6, addr, remote, sizeof(local));
4896 } else
d9a64523 4897#endif /* INET6 */
0a7de745
A
4898 {
4899 addr = &inp->inp_laddr.s_addr;
4900 inet_ntop(AF_INET, addr, local, sizeof(local));
4901 addr = &inp->inp_faddr.s_addr;
4902 inet_ntop(AF_INET, addr, remote, sizeof(local));
4903 }
4904
4905 if (so->so_cfil != NULL) {
d9a64523 4906 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
0a7de745
A
4907 msg, IS_UDP(so) ? "UDP" : "TCP",
4908 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
4909 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4910 } else {
d9a64523 4911 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
0a7de745
A
4912 msg, IS_UDP(so) ? "UDP" : "TCP",
4913 (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
4914 ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4915 }
d9a64523
A
4916}
4917
4918static void
4919cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
4920{
0a7de745 4921 if (cfil_info == NULL) {
d9a64523 4922 return;
0a7de745 4923 }
d9a64523 4924
0a7de745 4925 if (cfil_info->cfi_hash_entry != NULL) {
d9a64523 4926 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
0a7de745 4927 } else {
d9a64523 4928 cfil_inp_log(level, cfil_info->cfi_so, msg);
0a7de745 4929 }
d9a64523
A
4930}
4931
4932errno_t
4933cfil_db_init(struct socket *so)
4934{
0a7de745
A
4935 errno_t error = 0;
4936 struct cfil_db *db = NULL;
4937
4938 CFIL_LOG(LOG_INFO, "");
4939
4940 db = zalloc(cfil_db_zone);
4941 if (db == NULL) {
4942 error = ENOMEM;
4943 goto done;
4944 }
4945 bzero(db, sizeof(struct cfil_db));
4946 db->cfdb_so = so;
4947 db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
4948 if (db->cfdb_hashbase == NULL) {
4949 zfree(cfil_db_zone, db);
4950 db = NULL;
4951 error = ENOMEM;
4952 goto done;
4953 }
4954
4955 so->so_cfil_db = db;
d9a64523
A
4956
4957done:
0a7de745 4958 return error;
d9a64523
A
4959}
4960
4961void
4962cfil_db_free(struct socket *so)
4963{
0a7de745
A
4964 struct cfil_hash_entry *entry = NULL;
4965 struct cfil_hash_entry *temp_entry = NULL;
4966 struct cfilhashhead *cfilhash = NULL;
4967 struct cfil_db *db = NULL;
4968
4969 CFIL_LOG(LOG_INFO, "");
4970
4971 if (so == NULL || so->so_cfil_db == NULL) {
4972 return;
4973 }
4974 db = so->so_cfil_db;
d9a64523
A
4975
4976#if LIFECYCLE_DEBUG
4977 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
0a7de745 4978 (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
d9a64523
A
4979#endif
4980
0a7de745
A
4981 for (int i = 0; i < CFILHASHSIZE; i++) {
4982 cfilhash = &db->cfdb_hashbase[i];
4983 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
4984 if (entry->cfentry_cfil != NULL) {
d9a64523
A
4985#if LIFECYCLE_DEBUG
4986 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4987#endif
0a7de745
A
4988 cfil_info_free(entry->cfentry_cfil);
4989 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
4990 entry->cfentry_cfil = NULL;
4991 }
4992
4993 cfil_db_delete_entry(db, entry);
4994 if (so->so_flags & SOF_CONTENT_FILTER) {
4995 if (db->cfdb_count == 0) {
4996 so->so_flags &= ~SOF_CONTENT_FILTER;
4997 }
4998 VERIFY(so->so_usecount > 0);
4999 so->so_usecount--;
5000 }
5001 }
5002 }
5003
5004 // Make sure all entries are cleaned up!
5005 VERIFY(db->cfdb_count == 0);
d9a64523 5006#if LIFECYCLE_DEBUG
0a7de745 5007 CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
d9a64523
A
5008#endif
5009
0a7de745
A
5010 FREE(db->cfdb_hashbase, M_CFIL);
5011 zfree(cfil_db_zone, db);
5012 so->so_cfil_db = NULL;
d9a64523
A
5013}
5014
5015static bool
5016fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5017{
0a7de745
A
5018 struct sockaddr_in *sin = NULL;
5019 struct sockaddr_in6 *sin6 = NULL;
5020
5021 if (entry == NULL || addr == NULL) {
5022 return FALSE;
5023 }
5024
5025 switch (addr->sa_family) {
5026 case AF_INET:
5027 sin = satosin(addr);
5028 if (sin->sin_len != sizeof(*sin)) {
5029 return FALSE;
5030 }
5031 if (isLocal == TRUE) {
5032 entry->cfentry_lport = sin->sin_port;
5033 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5034 } else {
5035 entry->cfentry_fport = sin->sin_port;
5036 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5037 }
5038 entry->cfentry_family = AF_INET;
5039 return TRUE;
5040 case AF_INET6:
5041 sin6 = satosin6(addr);
5042 if (sin6->sin6_len != sizeof(*sin6)) {
5043 return FALSE;
5044 }
5045 if (isLocal == TRUE) {
5046 entry->cfentry_lport = sin6->sin6_port;
5047 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5048 } else {
5049 entry->cfentry_fport = sin6->sin6_port;
5050 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5051 }
5052 entry->cfentry_family = AF_INET6;
5053 return TRUE;
5054 default:
5055 return FALSE;
5056 }
d9a64523
A
5057}
5058
5059static bool
5060fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5061{
0a7de745
A
5062 if (entry == NULL || inp == NULL) {
5063 return FALSE;
5064 }
5065
5066 if (inp->inp_vflag & INP_IPV4) {
5067 if (isLocal == TRUE) {
5068 entry->cfentry_lport = inp->inp_lport;
5069 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5070 } else {
5071 entry->cfentry_fport = inp->inp_fport;
5072 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5073 }
5074 entry->cfentry_family = AF_INET;
5075 return TRUE;
5076 } else if (inp->inp_vflag & INP_IPV6) {
5077 if (isLocal == TRUE) {
5078 entry->cfentry_lport = inp->inp_lport;
5079 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5080 } else {
5081 entry->cfentry_fport = inp->inp_fport;
5082 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5083 }
5084 entry->cfentry_family = AF_INET6;
5085 return TRUE;
5086 }
5087 return FALSE;
d9a64523
A
5088}
5089
5090bool
5091check_port(struct sockaddr *addr, u_short port)
5092{
5093 struct sockaddr_in *sin = NULL;
5094 struct sockaddr_in6 *sin6 = NULL;
5095
5096 if (addr == NULL || port == 0) {
5097 return FALSE;
5098 }
5099
5100 switch (addr->sa_family) {
0a7de745
A
5101 case AF_INET:
5102 sin = satosin(addr);
5103 if (sin->sin_len != sizeof(*sin)) {
5104 return FALSE;
5105 }
5106 if (port == ntohs(sin->sin_port)) {
5107 return TRUE;
5108 }
5109 break;
5110 case AF_INET6:
5111 sin6 = satosin6(addr);
5112 if (sin6->sin6_len != sizeof(*sin6)) {
5113 return FALSE;
5114 }
5115 if (port == ntohs(sin6->sin6_port)) {
5116 return TRUE;
5117 }
5118 break;
5119 default:
5120 break;
d9a64523
A
5121 }
5122 return FALSE;
5123}
5124
5125struct cfil_hash_entry *
5126cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5127{
5128 struct cfilhashhead *cfilhash = NULL;
5129 u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5130 struct cfil_hash_entry *nextentry;
5131
5132 if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5133 return NULL;
5134 }
5135
5136 flowhash &= db->cfdb_hashmask;
5137 cfilhash = &db->cfdb_hashbase[flowhash];
5138
5139 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5140 if (nextentry->cfentry_cfil != NULL &&
0a7de745 5141 nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
d9a64523 5142 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
0a7de745 5143 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
d9a64523
A
5144 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5145 return nextentry;
5146 }
5147 }
5148
5149 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
0a7de745 5150 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
d9a64523
A
5151 return NULL;
5152}
5153
5154struct cfil_hash_entry *
5155cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5156{
0a7de745
A
5157 struct cfil_hash_entry matchentry;
5158 struct cfil_hash_entry *nextentry = NULL;
5159 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5160 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5161 int inp_hash_element = 0;
5162 struct cfilhashhead *cfilhash = NULL;
5163
5164 CFIL_LOG(LOG_INFO, "");
5165
5166 if (inp == NULL) {
5167 goto done;
5168 }
5169
5170 if (local != NULL) {
5171 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5172 } else {
5173 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5174 }
5175 if (remote != NULL) {
5176 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5177 } else {
5178 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5179 }
5180
d9a64523 5181#if INET6
0a7de745
A
5182 if (inp->inp_vflag & INP_IPV6) {
5183 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5184 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5185 } else
d9a64523 5186#endif /* INET6 */
0a7de745
A
5187 {
5188 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5189 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5190 }
5191
5192 inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5193 matchentry.cfentry_lport, matchentry.cfentry_fport);
5194 inp_hash_element &= db->cfdb_hashmask;
5195
5196 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5197
5198 LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
d9a64523 5199#if INET6
0a7de745
A
5200 if ((inp->inp_vflag & INP_IPV6) &&
5201 nextentry->cfentry_lport == matchentry.cfentry_lport &&
5202 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5203 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5204 IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
d9a64523 5205#if DATA_DEBUG
0a7de745 5206 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
d9a64523 5207#endif
0a7de745
A
5208 return nextentry;
5209 } else
d9a64523 5210#endif /* INET6 */
0a7de745
A
5211 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5212 nextentry->cfentry_fport == matchentry.cfentry_fport &&
5213 nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5214 nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
d9a64523 5215#if DATA_DEBUG
0a7de745 5216 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
d9a64523 5217#endif
0a7de745
A
5218 return nextentry;
5219 }
5220 }
5221
d9a64523
A
5222done:
5223#if DATA_DEBUG
0a7de745 5224 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
d9a64523 5225#endif
0a7de745 5226 return NULL;
d9a64523
A
5227}
5228
5229void
5230cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5231{
0a7de745
A
5232 if (hash_entry == NULL) {
5233 return;
5234 }
5235 if (db == NULL || db->cfdb_count == 0) {
5236 return;
5237 }
5238 db->cfdb_count--;
5239 if (db->cfdb_only_entry == hash_entry) {
5240 db->cfdb_only_entry = NULL;
5241 }
5242 LIST_REMOVE(hash_entry, cfentry_link);
5243 zfree(cfil_hash_entry_zone, hash_entry);
d9a64523
A
5244}
5245
5246struct cfil_hash_entry *
5247cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5248{
0a7de745
A
5249 struct cfil_hash_entry *entry = NULL;
5250 struct inpcb *inp = sotoinpcb(db->cfdb_so);
5251 u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5252 int inp_hash_element = 0;
5253 struct cfilhashhead *cfilhash = NULL;
5254
5255 CFIL_LOG(LOG_INFO, "");
5256
5257 if (inp == NULL) {
5258 goto done;
5259 }
5260
5261 entry = zalloc(cfil_hash_entry_zone);
5262 if (entry == NULL) {
5263 goto done;
5264 }
5265 bzero(entry, sizeof(struct cfil_hash_entry));
5266
5267 if (local != NULL) {
5268 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5269 } else {
5270 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5271 }
5272 if (remote != NULL) {
5273 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5274 } else {
5275 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5276 }
5277 entry->cfentry_lastused = net_uptime();
d9a64523
A
5278
5279#if INET6
0a7de745
A
5280 if (inp->inp_vflag & INP_IPV6) {
5281 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5282 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5283 } else
d9a64523 5284#endif /* INET6 */
0a7de745
A
5285 {
5286 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5287 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5288 }
5289 entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5290 entry->cfentry_lport, entry->cfentry_fport);
5291 inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5292
5293 cfilhash = &db->cfdb_hashbase[inp_hash_element];
5294
5295 LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5296 db->cfdb_count++;
d9a64523
A
5297 db->cfdb_only_entry = entry;
5298 cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
0a7de745 5299
d9a64523 5300done:
0a7de745
A
5301 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5302 return entry;
d9a64523
A
5303}
5304
5305struct cfil_info *
5306cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5307{
0a7de745 5308 struct cfil_hash_entry *hash_entry = NULL;
d9a64523 5309
0a7de745 5310 CFIL_LOG(LOG_INFO, "");
d9a64523 5311
0a7de745
A
5312 if (db == NULL || id == 0) {
5313 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5314 (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), id);
5315 return NULL;
5316 }
d9a64523
A
5317
5318 // This is an optimization for connected UDP socket which only has one flow.
5319 // No need to do the hash lookup.
5320 if (db->cfdb_count == 1) {
5321 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
0a7de745
A
5322 db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5323 return db->cfdb_only_entry->cfentry_cfil;
d9a64523
A
5324 }
5325 }
5326
5327 hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
0a7de745 5328 return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
d9a64523
A
5329}
5330
5331struct cfil_hash_entry *
5332cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5333{
5334#pragma unused(so, filter_control_unit, outgoing, local, remote)
5335 struct cfil_hash_entry *hash_entry = NULL;
5336
5337 errno_t error = 0;
0a7de745 5338 socket_lock_assert_owned(so);
d9a64523
A
5339
5340 // If new socket, allocate cfil db
5341 if (so->so_cfil_db == NULL) {
5342 if (cfil_db_init(so) != 0) {
0a7de745 5343 return NULL;
d9a64523
A
5344 }
5345 }
5346
0a7de745
A
5347 // See if flow already exists.
5348 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5349 if (hash_entry != NULL) {
5350 return hash_entry;
5351 }
d9a64523 5352
0a7de745
A
5353 hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5354 if (hash_entry == NULL) {
5355 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5356 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5357 return NULL;
5358 }
d9a64523 5359
0a7de745
A
5360 if (cfil_info_alloc(so, hash_entry) == NULL ||
5361 hash_entry->cfentry_cfil == NULL) {
5362 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5363 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5364 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5365 return NULL;
5366 }
d9a64523
A
5367
5368#if LIFECYCLE_DEBUG
5369 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5370#endif
5371
0a7de745
A
5372 if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5373 cfil_info_free(hash_entry->cfentry_cfil);
5374 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
d9a64523 5375 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
0a7de745
A
5376 filter_control_unit);
5377 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5378 return NULL;
5379 }
5380 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5381 (uint64_t)VM_KERNEL_ADDRPERM(so),
5382 filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5383
5384 so->so_flags |= SOF_CONTENT_FILTER;
5385 OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5386
5387 /* Hold a reference on the socket for each flow */
5388 so->so_usecount++;
5389
5390 error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, filter_control_unit);
5391 /* We can recover from flow control or out of memory errors */
5392 if (error != 0 && error != ENOBUFS && error != ENOMEM) {
5393 return NULL;
5394 }
5395
5396 CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5397 return hash_entry;
d9a64523
A
5398}
5399
5400errno_t
5401cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
0a7de745
A
5402 struct sockaddr *local, struct sockaddr *remote,
5403 struct mbuf *data, struct mbuf *control, uint32_t flags)
d9a64523
A
5404{
5405#pragma unused(outgoing, so, local, remote, data, control, flags)
0a7de745
A
5406 errno_t error = 0;
5407 uint32_t filter_control_unit;
d9a64523
A
5408 struct cfil_hash_entry *hash_entry = NULL;
5409 struct cfil_info *cfil_info = NULL;
5410
0a7de745
A
5411 socket_lock_assert_owned(so);
5412
5413 if (cfil_active_count == 0) {
5414 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5415 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5416 return error;
5417 }
5418
5419 filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5420 if (filter_control_unit == 0) {
5421 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5422 return error;
5423 }
5424
5425 if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
5426 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5427 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5428 return error;
5429 }
5430
5431 hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5432 if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
d9a64523 5433 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
0a7de745
A
5434 return EPIPE;
5435 }
d9a64523
A
5436 // Update last used timestamp, this is for flow Idle TO
5437 hash_entry->cfentry_lastused = net_uptime();
5438 cfil_info = hash_entry->cfentry_cfil;
5439
5440 if (cfil_info->cfi_flags & CFIF_DROP) {
5441#if DATA_DEBUG
5442 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
5443#endif
0a7de745 5444 return EPIPE;
d9a64523
A
5445 }
5446 if (control != NULL) {
5447 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5448 }
5449 if (data->m_type == MT_OOBDATA) {
5450 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
0a7de745 5451 (uint64_t)VM_KERNEL_ADDRPERM(so));
d9a64523
A
5452 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5453 }
5454
5455 error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5456
0a7de745 5457 return error;
d9a64523
A
5458}
5459
5460/*
5461 * Go through all UDP flows for specified socket and returns TRUE if
5462 * any flow is still attached. If need_wait is TRUE, wait on first
5463 * attached flow.
5464 */
5465static int
5466cfil_filters_udp_attached(struct socket *so, bool need_wait)
5467{
5468 struct timespec ts;
5469 lck_mtx_t *mutex_held;
5470 struct cfilhashhead *cfilhash = NULL;
5471 struct cfil_db *db = NULL;
5472 struct cfil_hash_entry *hash_entry = NULL;
5473 struct cfil_hash_entry *temp_hash_entry = NULL;
5474 struct cfil_info *cfil_info = NULL;
5475 struct cfil_entry *entry = NULL;
5476 errno_t error = 0;
5477 int kcunit;
5478 int attached = 0;
0a7de745 5479 uint64_t sock_flow_id = 0;
d9a64523
A
5480
5481 socket_lock_assert_owned(so);
5482
5483 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
0a7de745 5484 if (so->so_proto->pr_getlock != NULL) {
d9a64523 5485 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
0a7de745 5486 } else {
d9a64523 5487 mutex_held = so->so_proto->pr_domain->dom_mtx;
0a7de745 5488 }
d9a64523
A
5489 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5490
5491 db = so->so_cfil_db;
5492
5493 for (int i = 0; i < CFILHASHSIZE; i++) {
5494 cfilhash = &db->cfdb_hashbase[i];
5495
5496 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
d9a64523 5497 if (hash_entry->cfentry_cfil != NULL) {
d9a64523
A
5498 cfil_info = hash_entry->cfentry_cfil;
5499 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5500 entry = &cfil_info->cfi_entries[kcunit - 1];
5501
5502 /* Are we attached to the filter? */
5503 if (entry->cfe_filter == NULL) {
5504 continue;
5505 }
5506
0a7de745 5507 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
d9a64523 5508 continue;
0a7de745
A
5509 }
5510 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
d9a64523 5511 continue;
0a7de745 5512 }
d9a64523
A
5513
5514 attached = 1;
5515
5516 if (need_wait == TRUE) {
5517#if LIFECYCLE_DEBUG
5518 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5519#endif
5520
5521 ts.tv_sec = cfil_close_wait_timeout / 1000;
5522 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
0a7de745 5523 NSEC_PER_USEC * 1000;
d9a64523
A
5524
5525 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5526 cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
0a7de745
A
5527 sock_flow_id = cfil_info->cfi_sock_id;
5528
d9a64523 5529 error = msleep((caddr_t)cfil_info, mutex_held,
0a7de745
A
5530 PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
5531
5532 // Woke up from sleep, validate if cfil_info is still valid
5533 if (so->so_cfil_db == NULL ||
5534 (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
5535 // cfil_info is not valid, do not continue
5536 goto done;
5537 }
5538
d9a64523
A
5539 cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5540
5541#if LIFECYCLE_DEBUG
5542 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5543#endif
5544
5545 /*
5546 * Force close in case of timeout
5547 */
5548 if (error != 0) {
5549 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5550#if LIFECYCLE_DEBUG
5551 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5552#endif
5553 entry->cfe_flags |= CFEF_CFIL_DETACHED;
d9a64523
A
5554 }
5555 }
5556 goto done;
5557 }
5558 }
5559 }
5560 }
5561 }
5562
5563done:
0a7de745 5564 return attached;
d9a64523
A
5565}
5566
5567int32_t
5568cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
5569{
5570 struct socket *so = sb->sb_so;
5571 struct cfi_buf *cfi_buf;
5572 uint64_t pending = 0;
5573 uint64_t total_pending = 0;
5574 struct cfilhashhead *cfilhash = NULL;
5575 struct cfil_db *db = NULL;
5576 struct cfil_hash_entry *hash_entry = NULL;
5577 struct cfil_hash_entry *temp_hash_entry = NULL;
5578
5579 socket_lock_assert_owned(so);
5580
5581 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
0a7de745 5582 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
d9a64523
A
5583 db = so->so_cfil_db;
5584
5585 for (int i = 0; i < CFILHASHSIZE; i++) {
5586 cfilhash = &db->cfdb_hashbase[i];
5587
5588 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
d9a64523 5589 if (hash_entry->cfentry_cfil != NULL) {
0a7de745 5590 if ((sb->sb_flags & SB_RECV) == 0) {
d9a64523 5591 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
0a7de745 5592 } else {
d9a64523 5593 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
0a7de745 5594 }
d9a64523
A
5595
5596 pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
5597 /*
5598 * If we are limited by the "chars of mbufs used" roughly
5599 * adjust so we won't overcommit
5600 */
0a7de745 5601 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
d9a64523 5602 pending = cfi_buf->cfi_pending_mbcnt;
0a7de745 5603 }
d9a64523
A
5604
5605 total_pending += pending;
5606 }
5607 }
5608 }
5609
5610 VERIFY(total_pending < INT32_MAX);
5611#if DATA_DEBUG
5612 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
0a7de745
A
5613 (uint64_t)VM_KERNEL_ADDRPERM(so),
5614 total_pending, check_thread);
d9a64523
A
5615#endif
5616 }
5617
5618 return (int32_t)(total_pending);
5619}
5620
5621int
5622cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
5623{
5624 struct cfil_info *cfil_info = NULL;
5625 struct cfilhashhead *cfilhash = NULL;
5626 struct cfil_db *db = NULL;
5627 struct cfil_hash_entry *hash_entry = NULL;
5628 struct cfil_hash_entry *temp_hash_entry = NULL;
5629 errno_t error = 0;
5630 int done_count = 0;
5631 int kcunit;
5632
5633 socket_lock_assert_owned(so);
5634
5635 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
d9a64523
A
5636 db = so->so_cfil_db;
5637
5638 for (int i = 0; i < CFILHASHSIZE; i++) {
5639 cfilhash = &db->cfdb_hashbase[i];
5640
5641 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
d9a64523
A
5642 if (hash_entry->cfentry_cfil != NULL) {
5643 cfil_info = hash_entry->cfentry_cfil;
5644
5645 // This flow is marked as DROP
5646 if (cfil_info->cfi_flags & drop_flag) {
5647 done_count++;
5648 continue;
5649 }
5650
5651 // This flow has been shut already, skip
5652 if (cfil_info->cfi_flags & shut_flag) {
5653 continue;
5654 }
5655 // Mark flow as shut
5656 cfil_info->cfi_flags |= shut_flag;
5657 done_count++;
5658
5659 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5660 /* Disconnect incoming side */
5661 if (how != SHUT_WR) {
5662 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
5663 }
5664 /* Disconnect outgoing side */
5665 if (how != SHUT_RD) {
5666 error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
5667 }
5668 }
5669 }
5670 }
5671 }
5672 }
5673
5674 if (done_count == 0) {
5675 error = ENOTCONN;
5676 }
0a7de745 5677 return error;
fe8ab488 5678}
d9a64523
A
5679
5680int
5681cfil_sock_udp_shutdown(struct socket *so, int *how)
5682{
5683 int error = 0;
5684
0a7de745 5685 if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
d9a64523 5686 goto done;
0a7de745 5687 }
d9a64523
A
5688
5689 socket_lock_assert_owned(so);
5690
5691 CFIL_LOG(LOG_INFO, "so %llx how %d",
0a7de745 5692 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
d9a64523
A
5693
5694 /*
5695 * Check the state of the socket before the content filter
5696 */
5697 if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5698 /* read already shut down */
5699 error = ENOTCONN;
5700 goto done;
5701 }
5702 if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5703 /* write already shut down */
5704 error = ENOTCONN;
5705 goto done;
5706 }
5707
5708 /*
5709 * shutdown read: SHUT_RD or SHUT_RDWR
5710 */
5711 if (*how != SHUT_WR) {
5712 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
0a7de745 5713 if (error != 0) {
d9a64523 5714 goto done;
0a7de745 5715 }
d9a64523
A
5716 }
5717 /*
5718 * shutdown write: SHUT_WR or SHUT_RDWR
5719 */
5720 if (*how != SHUT_RD) {
5721 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
0a7de745 5722 if (error != 0) {
d9a64523 5723 goto done;
0a7de745 5724 }
d9a64523
A
5725
5726 /*
5727 * When outgoing data is pending, we delay the shutdown at the
5728 * protocol level until the content filters give the final
5729 * verdict on the pending data.
5730 */
5731 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5732 /*
5733 * When shutting down the read and write sides at once
5734 * we can proceed to the final shutdown of the read
5735 * side. Otherwise, we just return.
5736 */
5737 if (*how == SHUT_WR) {
5738 error = EJUSTRETURN;
5739 } else if (*how == SHUT_RDWR) {
5740 *how = SHUT_RD;
5741 }
5742 }
5743 }
5744done:
0a7de745 5745 return error;
d9a64523
A
5746}
5747
5748void
5749cfil_sock_udp_close_wait(struct socket *so)
5750{
5751 socket_lock_assert_owned(so);
5752
5753 while (cfil_filters_udp_attached(so, FALSE)) {
5754 /*
5755 * Notify the filters we are going away so they can detach
5756 */
5757 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
5758
5759 /*
5760 * Make sure we need to wait after the filter are notified
5761 * of the disconnection
5762 */
0a7de745 5763 if (cfil_filters_udp_attached(so, TRUE) == 0) {
d9a64523 5764 break;
0a7de745 5765 }
d9a64523
A
5766 }
5767}
5768
5769void
5770cfil_sock_udp_is_closed(struct socket *so)
5771{
5772 struct cfil_info *cfil_info = NULL;
5773 struct cfilhashhead *cfilhash = NULL;
5774 struct cfil_db *db = NULL;
5775 struct cfil_hash_entry *hash_entry = NULL;
5776 struct cfil_hash_entry *temp_hash_entry = NULL;
5777 errno_t error = 0;
5778 int kcunit;
5779
5780 socket_lock_assert_owned(so);
5781
5782 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
d9a64523
A
5783 db = so->so_cfil_db;
5784
5785 for (int i = 0; i < CFILHASHSIZE; i++) {
5786 cfilhash = &db->cfdb_hashbase[i];
5787
5788 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5789 if (hash_entry->cfentry_cfil != NULL) {
d9a64523
A
5790 cfil_info = hash_entry->cfentry_cfil;
5791
5792 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5793 /* Let the filters know of the closing */
5794 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
5795 }
5796
5797 /* Last chance to push passed data out */
5798 error = cfil_acquire_sockbuf(so, cfil_info, 1);
0a7de745 5799 if (error == 0) {
d9a64523 5800 cfil_service_inject_queue(so, cfil_info, 1);
0a7de745 5801 }
d9a64523
A
5802 cfil_release_sockbuf(so, 1);
5803
5804 cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
5805
5806 /* Pending data needs to go */
5807 cfil_flush_queues(so, cfil_info);
5808
5809 CFIL_INFO_VERIFY(cfil_info);
5810 }
5811 }
5812 }
5813 }
5814}
5815
5816void
5817cfil_sock_udp_buf_update(struct sockbuf *sb)
5818{
5819 struct cfil_info *cfil_info = NULL;
5820 struct cfilhashhead *cfilhash = NULL;
5821 struct cfil_db *db = NULL;
5822 struct cfil_hash_entry *hash_entry = NULL;
5823 struct cfil_hash_entry *temp_hash_entry = NULL;
5824 errno_t error = 0;
5825 int outgoing;
5826 struct socket *so = sb->sb_so;
5827
5828 socket_lock_assert_owned(so);
5829
5830 if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
0a7de745 5831 if (!cfil_sbtrim) {
d9a64523 5832 return;
0a7de745 5833 }
d9a64523
A
5834
5835 db = so->so_cfil_db;
5836
5837 for (int i = 0; i < CFILHASHSIZE; i++) {
5838 cfilhash = &db->cfdb_hashbase[i];
5839
5840 LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5841 if (hash_entry->cfentry_cfil != NULL) {
d9a64523
A
5842 cfil_info = hash_entry->cfentry_cfil;
5843
5844 if ((sb->sb_flags & SB_RECV) == 0) {
0a7de745 5845 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
d9a64523 5846 return;
0a7de745 5847 }
d9a64523
A
5848 outgoing = 1;
5849 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5850 } else {
0a7de745 5851 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
d9a64523 5852 return;
0a7de745 5853 }
d9a64523
A
5854 outgoing = 0;
5855 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5856 }
5857
5858 CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
0a7de745 5859 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
d9a64523
A
5860
5861 error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
0a7de745 5862 if (error == 0) {
d9a64523 5863 cfil_service_inject_queue(so, cfil_info, outgoing);
0a7de745 5864 }
d9a64523
A
5865 cfil_release_sockbuf(so, outgoing);
5866 }
5867 }
5868 }
5869 }
5870}
5871
5872void
5873cfil_filter_show(u_int32_t kcunit)
5874{
5875 struct content_filter *cfc = NULL;
5876 struct cfil_entry *entry;
5877 int count = 0;
5878
5879 if (content_filters == NULL) {
5880 return;
5881 }
5882 if (kcunit > MAX_CONTENT_FILTER) {
5883 return;
5884 }
5885
5886 cfil_rw_lock_shared(&cfil_lck_rw);
5887
5888 if (content_filters[kcunit - 1] == NULL) {
5889 cfil_rw_unlock_shared(&cfil_lck_rw);
5890 return;
5891 }
5892 cfc = content_filters[kcunit - 1];
5893
5894 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
0a7de745
A
5895 kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
5896 if (cfc->cf_flags & CFF_DETACHING) {
d9a64523 5897 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
0a7de745
A
5898 }
5899 if (cfc->cf_flags & CFF_ACTIVE) {
d9a64523 5900 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
0a7de745
A
5901 }
5902 if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
d9a64523 5903 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
0a7de745 5904 }
d9a64523
A
5905
5906 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
d9a64523
A
5907 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
5908 struct cfil_info *cfil_info = entry->cfe_cfil_info;
5909
5910 count++;
5911
0a7de745 5912 if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
d9a64523 5913 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
0a7de745 5914 } else {
d9a64523 5915 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
0a7de745 5916 }
d9a64523
A
5917 }
5918 }
5919
5920 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
5921
5922 cfil_rw_unlock_shared(&cfil_lck_rw);
d9a64523
A
5923}
5924
5925void
5926cfil_info_show(void)
5927{
5928 struct cfil_info *cfil_info;
5929 int count = 0;
5930
5931 cfil_rw_lock_shared(&cfil_lck_rw);
5932
5933 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
5934
5935 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
d9a64523
A
5936 count++;
5937
5938 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
5939
0a7de745 5940 if (cfil_info->cfi_flags & CFIF_DROP) {
d9a64523 5941 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
0a7de745
A
5942 }
5943 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
d9a64523 5944 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
0a7de745
A
5945 }
5946 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
d9a64523 5947 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
0a7de745
A
5948 }
5949 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
d9a64523 5950 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
0a7de745
A
5951 }
5952 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
d9a64523 5953 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
0a7de745
A
5954 }
5955 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
d9a64523 5956 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
0a7de745
A
5957 }
5958 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
d9a64523 5959 CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
0a7de745 5960 }
d9a64523
A
5961 }
5962
5963 CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
5964
5965 cfil_rw_unlock_shared(&cfil_lck_rw);
5966}
5967
5968bool
5969cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
5970{
5971 if (cfil_info && cfil_info->cfi_hash_entry &&
0a7de745 5972 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
d9a64523
A
5973#if GC_DEBUG
5974 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
5975#endif
5976 return true;
5977 }
5978 return false;
5979}
5980
5981bool
5982cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
5983{
5984 struct cfil_entry *entry;
5985 struct timeval current_tv;
5986 struct timeval diff_time;
5987
0a7de745 5988 if (cfil_info == NULL) {
d9a64523 5989 return false;
0a7de745 5990 }
d9a64523
A
5991
5992 /*
5993 * If we have queued up more data than passed offset and we haven't received
5994 * an action from user space for a while (the user space filter might have crashed),
5995 * return action timed out.
5996 */
5997 if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
0a7de745 5998 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
d9a64523
A
5999 microuptime(&current_tv);
6000
6001 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6002 entry = &cfil_info->cfi_entries[kcunit - 1];
6003
0a7de745 6004 if (entry->cfe_filter == NULL) {
d9a64523 6005 continue;
0a7de745 6006 }
d9a64523
A
6007
6008 if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
0a7de745 6009 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
d9a64523
A
6010 // haven't gotten an action from this filter, check timeout
6011 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6012 if (diff_time.tv_sec >= timeout) {
6013#if GC_DEBUG
6014 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6015#endif
6016 return true;
6017 }
6018 }
6019 }
6020 }
6021 return false;
6022}
6023
6024bool
6025cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6026{
0a7de745 6027 if (cfil_info == NULL) {
d9a64523 6028 return false;
0a7de745 6029 }
d9a64523
A
6030
6031 /*
6032 * Clean up flow if it exceeded queue thresholds
6033 */
6034 if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
0a7de745 6035 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
d9a64523
A
6036#if GC_DEBUG
6037 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
0a7de745
A
6038 cfil_udp_gc_mbuf_num_max,
6039 cfil_udp_gc_mbuf_cnt_max,
6040 cfil_info->cfi_snd.cfi_tail_drop_cnt,
6041 cfil_info->cfi_rcv.cfi_tail_drop_cnt);
d9a64523
A
6042 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6043#endif
6044 return true;
6045 }
6046
6047 return false;
6048}
6049
6050static void
6051cfil_udp_gc_thread_sleep(bool forever)
6052{
6053 if (forever) {
6054 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
0a7de745 6055 THREAD_INTERRUPTIBLE);
d9a64523
A
6056 } else {
6057 uint64_t deadline = 0;
6058 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6059 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6060
6061 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
0a7de745 6062 THREAD_INTERRUPTIBLE, deadline);
d9a64523
A
6063 }
6064}
6065
6066static void
6067cfil_udp_gc_thread_func(void *v, wait_result_t w)
6068{
6069#pragma unused(v, w)
6070
6071 ASSERT(cfil_udp_gc_thread == current_thread());
6072 thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6073
6074 // Kick off gc shortly
6075 cfil_udp_gc_thread_sleep(false);
6076 thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6077 /* NOTREACHED */
6078}
6079
6080static void
6081cfil_info_udp_expire(void *v, wait_result_t w)
6082{
6083#pragma unused(v, w)
6084
6085 static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6086 static uint32_t expired_count = 0;
6087
6088 struct cfil_info *cfil_info;
6089 struct cfil_hash_entry *hash_entry;
6090 struct cfil_db *db;
6091 struct socket *so;
6092 u_int32_t current_time = 0;
6093
6094 current_time = net_uptime();
6095
6096 // Get all expired UDP flow ids
6097 cfil_rw_lock_shared(&cfil_lck_rw);
6098
6099 if (cfil_sock_udp_attached_count == 0) {
6100 cfil_rw_unlock_shared(&cfil_lck_rw);
6101 goto go_sleep;
6102 }
6103
6104 TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
0a7de745 6105 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
d9a64523 6106 break;
0a7de745 6107 }
d9a64523
A
6108
6109 if (IS_UDP(cfil_info->cfi_so)) {
6110 if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
0a7de745
A
6111 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6112 cfil_info_buffer_threshold_exceeded(cfil_info)) {
d9a64523
A
6113 expired_array[expired_count] = cfil_info->cfi_sock_id;
6114 expired_count++;
6115 }
6116 }
6117 }
6118 cfil_rw_unlock_shared(&cfil_lck_rw);
6119
0a7de745 6120 if (expired_count == 0) {
d9a64523 6121 goto go_sleep;
0a7de745 6122 }
d9a64523
A
6123
6124 for (uint32_t i = 0; i < expired_count; i++) {
d9a64523
A
6125 // Search for socket (UDP only and lock so)
6126 so = cfil_socket_from_sock_id(expired_array[i], true);
6127 if (so == NULL) {
6128 continue;
6129 }
6130
6131 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6132 if (cfil_info == NULL) {
6133 goto unlock;
6134 }
6135
6136 db = so->so_cfil_db;
6137 hash_entry = cfil_info->cfi_hash_entry;
6138
6139 if (db == NULL || hash_entry == NULL) {
6140 goto unlock;
6141 }
6142
6143#if GC_DEBUG || LIFECYCLE_DEBUG
6144 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6145#endif
6146
6147 cfil_db_delete_entry(db, hash_entry);
6148 cfil_info_free(cfil_info);
6149 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6150
6151 if (so->so_flags & SOF_CONTENT_FILTER) {
0a7de745 6152 if (db->cfdb_count == 0) {
d9a64523 6153 so->so_flags &= ~SOF_CONTENT_FILTER;
0a7de745 6154 }
d9a64523
A
6155 VERIFY(so->so_usecount > 0);
6156 so->so_usecount--;
6157 }
6158unlock:
6159 socket_unlock(so, 1);
6160 }
6161
6162#if GC_DEBUG
6163 CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6164#endif
6165 expired_count = 0;
6166
6167go_sleep:
6168
6169 // Sleep forever (until waken up) if no more UDP flow to clean
6170 cfil_rw_lock_shared(&cfil_lck_rw);
6171 cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
6172 cfil_rw_unlock_shared(&cfil_lck_rw);
6173 thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
6174 /* NOTREACHED */
6175}
6176
6177struct m_tag *
6178cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
6179{
6180 struct m_tag *tag = NULL;
6181 struct cfil_tag *ctag = NULL;
6182 struct cfil_hash_entry *hash_entry = NULL;
6183
6184 if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
0a7de745 6185 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
d9a64523
A
6186 return NULL;
6187 }
6188
6189 /* Allocate a tag */
6190 tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
0a7de745 6191 sizeof(struct cfil_tag), M_DONTWAIT, m);
d9a64523
A
6192
6193 if (tag) {
6194 ctag = (struct cfil_tag*)(tag + 1);
6195 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6196 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6197
6198 hash_entry = cfil_info->cfi_hash_entry;
6199 if (hash_entry->cfentry_family == AF_INET6) {
6200 fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
0a7de745
A
6201 &hash_entry->cfentry_faddr.addr6,
6202 hash_entry->cfentry_fport);
d9a64523
A
6203 } else if (hash_entry->cfentry_family == AF_INET) {
6204 fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
0a7de745
A
6205 hash_entry->cfentry_faddr.addr46.ia46_addr4,
6206 hash_entry->cfentry_fport);
d9a64523
A
6207 }
6208 m_tag_prepend(m, tag);
0a7de745 6209 return tag;
d9a64523
A
6210 }
6211 return NULL;
6212}
6213
6214struct m_tag *
6215cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
0a7de745 6216 struct sockaddr **faddr)
d9a64523
A
6217{
6218 struct m_tag *tag = NULL;
6219 struct cfil_tag *ctag = NULL;
6220
6221 tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6222 if (tag) {
6223 ctag = (struct cfil_tag *)(tag + 1);
0a7de745 6224 if (state_change_cnt) {
d9a64523 6225 *state_change_cnt = ctag->cfil_so_state_change_cnt;
0a7de745
A
6226 }
6227 if (options) {
d9a64523 6228 *options = ctag->cfil_so_options;
0a7de745
A
6229 }
6230 if (faddr) {
d9a64523 6231 *faddr = (struct sockaddr *) &ctag->cfil_faddr;
0a7de745 6232 }
d9a64523
A
6233
6234 /*
6235 * Unlink tag and hand it over to caller.
6236 * Note that caller will be responsible to free it.
6237 */
6238 m_tag_unlink(m, tag);
6239 return tag;
6240 }
6241 return NULL;
6242}