bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by INET/INET6 sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many INET/INET6 sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
  50  *   UDP, ICMP, etc).
  51  * - The current implementation supports up to two simultaneous content filters
  52  *   for iOS devices and eight simultaneous content filters for OSX.
  53  *
  54  *
  55  * NECP FILTER CONTROL UNIT
  56  *
  57  * A user space filter agent uses the Network Extension Control Policy (NECP)
  58  * database to specify which INET/INET6 sockets need to be filtered. The NECP
  59  * criteria may be based on a variety of properties like user ID or proc UUID.
  60  *
  61  * The NECP "filter control unit" is used by the socket content filter subsystem
  62  * to deliver the relevant INET/INET6 content information to the appropriate
  63  * user space filter agent via its kernel control socket instance.
  64  * This works as follows:
  65  *
  66  * 1) The user space filter agent specifies an NECP filter control unit when
  67  *    in adds its filtering rules to the NECP database.
  68  *
  69  * 2) The user space filter agent also sets its NECP filter control unit on the
  70  *    content filter kernel control socket via the socket option
  71  *    CFIL_OPT_NECP_CONTROL_UNIT.
  72  *
  73  * 3) The NECP database is consulted to find out if a given INET/INET6 socket
  74  *    needs to be subjected to content filtering and returns the corresponding
  75  *    NECP filter control unit  -- the NECP filter control unit is actually
  76  *    stored in the INET/INET6 socket structure so the NECP lookup is really simple.
  77  *
  78  * 4) The NECP filter control unit is then used to find the corresponding
  79  *    kernel control socket instance.
  80  *
  81  * Note: NECP currently supports a single filter control unit per INET/INET6 socket
  82  *       but this restriction may be soon lifted.
  83  *
  84  *
  85  * THE MESSAGING PROTOCOL
  86  *
  87  * The socket content filter subsystem and a user space filter agent
  88  * communicate over the kernel control socket via an asynchronous
  89  * messaging protocol (this is not a request-response protocol).
  90  * The socket content filter subsystem sends event messages to the user
  91  * space filter agent about the INET/INET6 sockets it is interested to filter.
  92  * The user space filter agent sends action messages to either allow
  93  * data to pass or to disallow the data flow (and drop the connection).
  94  *
  95  * All messages over a content filter kernel control socket share the same
  96  * common header of type "struct cfil_msg_hdr". The message type tells if
  97  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  98  * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
  99  * For TCP, flows are per-socket.  For UDP and other datagrame protocols, there
 100  * could be multiple flows per socket.
 101  *
 102  * Note the message header length field may be padded for alignment and can
 103  * be larger than the actual content of the message.
 104  * The field "cfm_op" describe the kind of event or action.
 105  *
 106  * Here are the kinds of content filter events:
 107  * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
 108  * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
 109  * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
 110  * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
 111  *
 112  *
 113  * EVENT MESSAGES
 114  *
 115  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 116  * data that is being sent or received. The position of this span of data
 117  * in the data flow is described by a set of start and end offsets. These
 118  * are absolute 64 bits offsets. The first byte sent (or received) starts
 119  * at offset 0 and ends at offset 1. The length of the content data
 120  * is given by the difference between the end offset and the start offset.
 121  *
 122  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 123  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 124  * action message is sent by the user space filter agent.
 125  *
 126  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 127  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 128  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 129  *
 130  * They are two kinds of primary content filter actions:
 131  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 132  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 133  *
 134  * There is also an action to mark a given client flow as already filtered
 135  * at a higher level, CFM_OP_BLESS_CLIENT.
 136  *
 137  *
 138  * ACTION MESSAGES
 139  *
 140  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 141  * agent allow data to flow up to the specified pass offset -- there
 142  * is a pass offset for outgoing data and a pass offset for incoming data.
 143  * When a new INET/INET6 socket is attached to the content filter and a flow is
 144  * created, each pass offset is initially set to 0 so no data is allowed to pass by
 145  * default.  When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 146  * then the data flow becomes unrestricted.
 147  *
 148  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 149  * with a pass offset smaller than the pass offset of a previous
 150  * CFM_OP_DATA_UPDATE message is silently ignored.
 151  *
 152  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 153  * to tell the kernel how much data it wants to see by using the peek offsets.
 154  * Just like pass offsets, there is a peek offset for each direction.
 155  * When a new INET/INET6 flow is created, each peek offset is initially set to 0
 156  * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
 157  * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
 158  * by the user space filter agent.  When the peek offset is set to CFM_MAX_OFFSET via
 159  * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
 160  *
 161  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 162  * Also a peek offsets cannot be smaller than the corresponding end offset
 163  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 164  * to set a too small peek value is silently ignored.
 165  *
 166  *
 167  * PER FLOW "struct cfil_info"
 168  *
 169  * As soon as a INET/INET6 socket gets attached to a content filter, a
 170  * "struct cfil_info" is created to hold the content filtering state for this
 171  * socket.  For UDP and other datagram protocols, as soon as traffic is seen for
 172  * each new flow identified by its 4-tuple of source address/port and destination
 173  * address/port, a "struct cfil_info" is created.  Each datagram socket may
 174  * have multiple flows maintained in a hash table of "struct cfil_info" entries.
 175  *
 176  * The content filtering state is made of the following information
 177  * for each direction:
 178  * - The current pass offset;
 179  * - The first and last offsets of the data pending, waiting for a filtering
 180  *   decision;
 181  * - The inject queue for data that passed the filters and that needs
 182  *   to be re-injected;
 183  * - A content filter specific state in a set of  "struct cfil_entry"
 184  *
 185  *
 186  * CONTENT FILTER STATE "struct cfil_entry"
 187  *
 188  * The "struct cfil_entry" maintains the information most relevant to the
 189  * message handling over a kernel control socket with a user space filter agent.
 190  *
 191  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 192  * to the kernel control socket unit it corresponds to and also has a pointer
 193  * to the corresponding "struct content_filter".
 194  *
 195  * For each direction, "struct cfil_entry" maintains the following information:
 196  * - The pass offset
 197  * - The peek offset
 198  * - The offset of the last data peeked at by the filter
 199  * - A queue of data that's waiting to be delivered to the  user space filter
 200  *   agent on the kernel control socket
 201  * - A queue of data for which event messages have been sent on the kernel
 202  *   control socket and are pending for a filtering decision.
 203  *
 204  *
 205  * CONTENT FILTER QUEUES
 206  *
 207  * Data that is being filtered is steered away from the INET/INET6 socket buffer
 208  * and instead will sit in one of three content filter queues until the data
 209  * can be re-injected into the INET/INET6 socket buffer.
 210  *
 211  * A content filter queue is represented by "struct cfil_queue" that contains
 212  * a list of mbufs and the start and end offset of the data span of
 213  * the list of mbufs.
 214  *
 215  * The data moves into the three content filter queues according to this
 216  * sequence:
 217  * a) The "cfe_ctl_q" of "struct cfil_entry"
 218  * b) The "cfe_pending_q" of "struct cfil_entry"
 219  * c) The "cfi_inject_q" of "struct cfil_info"
 220  *
 221  * Note: The sequence (a),(b) may be repeated several times if there is more
 222  * than one content filter attached to the INET/INET6 socket.
 223  *
 224  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 225  * kernel conntrol socket for two reasons:
 226  * - The peek offset is less that the end offset of the mbuf data
 227  * - The kernel control socket is flow controlled
 228  *
 229  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 230  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 231  * socket and are waiting for a pass action message fromn the user space
 232  * filter agent. An mbuf length must be fully allowed to pass to be removed
 233  * from the cfe_pending_q.
 234  *
 235  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 236  * by the user space filter agent and that needs to be re-injected into the
 237  * INET/INET6 socket.
 238  *
 239  *
 240  * IMPACT ON FLOW CONTROL
 241  *
 242  * An essential aspect of the content filer subsystem is to minimize the
 243  * impact on flow control of the INET/INET6 sockets being filtered.
 244  *
 245  * The processing overhead of the content filtering may have an effect on
 246  * flow control by adding noticeable delays and cannot be eliminated --
 247  * care must be taken by the user space filter agent to minimize the
 248  * processing delays.
 249  *
 250  * The amount of data being filtered is kept in buffers while waiting for
 251  * a decision by the user space filter agent. This amount of data pending
 252  * needs to be subtracted from the amount of data available in the
 253  * corresponding INET/INET6 socket buffer. This is done by modifying
 254  * sbspace() and tcp_sbspace() to account for amount of data pending
 255  * in the content filter.
 256  *
 257  *
 258  * LOCKING STRATEGY
 259  *
 260  * The global state of content filter subsystem is protected by a single
 261  * read-write lock "cfil_lck_rw". The data flow can be done with the
 262  * cfil read-write lock held as shared so it can be re-entered from multiple
 263  * threads.
 264  *
 265  * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
 266  * protected by the socket lock.
 267  *
 268  * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
 269  * is held. That's why we have some sequences where we drop the cfil read-write
 270  * lock before taking the INET/INET6 lock.
 271  *
 272  * It is also important to lock the INET/INET6 socket buffer while the content
 273  * filter is modifying the amount of pending data. Otherwise the calculations
 274  * in sbspace() and tcp_sbspace()  could be wrong.
 275  *
 276  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 277  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 278  *
 279  * Actually "cfe_link" and "cfe_filter" are protected by both by
 280  * "cfil_lck_rw" and the socket lock: they may be modified only when
 281  * "cfil_lck_rw" is exclusive and the socket is locked.
 282  *
 283  * To read the other fields of "struct content_filter" we have to take
 284  * "cfil_lck_rw" in shared mode.
 285  *
 286  * DATAGRAM SPECIFICS:
 287  *
 288  * The socket content filter supports all INET/INET6 protocols.  However
 289  * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
 290  * are slightly different.
 291  *
 292  * Each datagram socket may have multiple flows.  Each flow is identified
 293  * by the flow's source address/port and destination address/port tuple
 294  * and is represented as a "struct cfil_info" entry.  For each socket,
 295  * a hash table is used to maintain the collection of flows under that socket.
 296  *
 297  * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
 298  * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt.  This portion
 299  * of the cfi_sock_id is used locate the socket during socket lookup.  The lowest 32-bits
 300  * of the cfi_sock_id contains a hash of the flow's 4-tuple.  This portion of the cfi_sock_id
 301  * is used as the hash value for the flow hash table lookup within the parent socket.
 302  *
 303  * Since datagram sockets may not be connected, flow states may not be maintained in the
 304  * socket structures and thus have to be saved for each packet.  These saved states will be
 305  * used for both outgoing and incoming reinjections.  For outgoing packets, destination
 306  * address/port as well as the current socket states will be saved.  During reinjection,
 307  * these saved states will be used instead.  For incoming packets, control and address
 308  * mbufs will be chained to the data.  During reinjection, the whole chain will be queued
 309  * onto the incoming socket buffer.
 310  *
 311  * LIMITATIONS
 312  *
 313  * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
 314  *
 315  * - Does not support TCP unordered messages
 316  */
 317
 318 /*
 319  *      TO DO LIST
 320  *
 321  *      Deal with OOB
 322  *
 323  */
 324
 325 #include <sys/types.h>
 326 #include <sys/kern_control.h>
 327 #include <sys/queue.h>
 328 #include <sys/domain.h>
 329 #include <sys/protosw.h>
 330 #include <sys/syslog.h>
 331 #include <sys/systm.h>
 332 #include <sys/param.h>
 333 #include <sys/mbuf.h>
 334
 335 #include <kern/locks.h>
 336 #include <kern/zalloc.h>
 337 #include <kern/debug.h>
 338
 339 #include <net/content_filter.h>
 340 #include <net/content_filter_crypto.h>
 341
 342 #define _IP_VHL
 343 #include <netinet/ip.h>
 344 #include <netinet/in_pcb.h>
 345 #include <netinet/tcp.h>
 346 #include <netinet/tcp_var.h>
 347 #include <netinet/udp.h>
 348 #include <netinet/udp_var.h>
 349
 350 #include <string.h>
 351 #include <libkern/libkern.h>
 352 #include <kern/sched_prim.h>
 353 #include <kern/task.h>
 354 #include <mach/task_info.h>
 355
 356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 357 #define MAX_CONTENT_FILTER 2
 358 #else
 359 #define MAX_CONTENT_FILTER 8
 360 #endif
 361
 362 extern struct inpcbinfo ripcbinfo;
 363 struct cfil_entry;
 364
 365 /*
 366  * The structure content_filter represents a user space content filter
 367  * It's created and associated with a kernel control socket instance
 368  */
 369 struct content_filter {
 370         kern_ctl_ref            cf_kcref;
 371         u_int32_t               cf_kcunit;
 372         u_int32_t               cf_flags;
 373
 374         uint32_t                cf_necp_control_unit;
 375
 376         uint32_t                cf_sock_count;
 377         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 378
 379         cfil_crypto_state_t cf_crypto_state;
 380 };
 381
 382 #define CFF_ACTIVE              0x01
 383 #define CFF_DETACHING           0x02
 384 #define CFF_FLOW_CONTROLLED     0x04
 385
 386 struct content_filter **content_filters = NULL;
 387 uint32_t cfil_active_count = 0; /* Number of active content filters */
 388 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 389 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 390 uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 392
 393 static kern_ctl_ref cfil_kctlref = NULL;
 394
 395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 396 static lck_attr_t *cfil_lck_attr = NULL;
 397 static lck_grp_t *cfil_lck_grp = NULL;
 398 decl_lck_rw_data(static, cfil_lck_rw);
 399
 400 #define CFIL_RW_LCK_MAX 8
 401
 402 int cfil_rw_nxt_lck = 0;
 403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 404
 405 int cfil_rw_nxt_unlck = 0;
 406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 407
 408 static ZONE_DECLARE(content_filter_zone, "content_filter",
 409     sizeof(struct content_filter), ZC_NONE);
 410
 411 MBUFQ_HEAD(cfil_mqhead);
 412
 413 struct cfil_queue {
 414         uint64_t                q_start; /* offset of first byte in queue */
 415         uint64_t                q_end; /* offset of last byte in queue */
 416         struct cfil_mqhead      q_mq;
 417 };
 418
 419 /*
 420  * struct cfil_entry
 421  *
 422  * The is one entry per content filter
 423  */
 424 struct cfil_entry {
 425         TAILQ_ENTRY(cfil_entry) cfe_link;
 426         SLIST_ENTRY(cfil_entry) cfe_order_link;
 427         struct content_filter   *cfe_filter;
 428
 429         struct cfil_info        *cfe_cfil_info;
 430         uint32_t                cfe_flags;
 431         uint32_t                cfe_necp_control_unit;
 432         struct timeval          cfe_last_event; /* To user space */
 433         struct timeval          cfe_last_action; /* From user space */
 434         uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
 435         uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
 436         struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
 437         uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
 438         boolean_t               cfe_laddr_sent;
 439
 440         struct cfe_buf {
 441                 /*
 442                  * cfe_pending_q holds data that has been delivered to
 443                  * the filter and for which we are waiting for an action
 444                  */
 445                 struct cfil_queue       cfe_pending_q;
 446                 /*
 447                  * This queue is for data that has not be delivered to
 448                  * the content filter (new data, pass peek or flow control)
 449                  */
 450                 struct cfil_queue       cfe_ctl_q;
 451
 452                 uint64_t                cfe_pass_offset;
 453                 uint64_t                cfe_peek_offset;
 454                 uint64_t                cfe_peeked;
 455         } cfe_snd, cfe_rcv;
 456 };
 457
 458 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 459 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 460 #define CFEF_DATA_START                 0x0004  /* can send data event */
 461 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 462 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 463 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 464 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 465 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 466
 467
 468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 469                 struct timeval64 _tdiff;                                                                                          \
 470                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 471                         timersub(t1, t0, &_tdiff);                                                                              \
 472                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 473                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 474                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 475                 }
 476
 477 struct cfil_hash_entry;
 478
 479 /*
 480  * struct cfil_info
 481  *
 482  * There is a struct cfil_info per socket
 483  */
 484 struct cfil_info {
 485         TAILQ_ENTRY(cfil_info)  cfi_link;
 486         TAILQ_ENTRY(cfil_info)  cfi_link_stats;
 487         struct socket           *cfi_so;
 488         uint64_t                cfi_flags;
 489         uint64_t                cfi_sock_id;
 490         struct timeval64        cfi_first_event;
 491         uint32_t                cfi_op_list_ctr;
 492         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 493         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 494         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 495         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 496
 497         int                     cfi_dir;
 498         uint64_t                cfi_byte_inbound_count;
 499         uint64_t                cfi_byte_outbound_count;
 500
 501         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 502         u_int32_t               cfi_filter_control_unit;
 503         u_int32_t               cfi_debug;
 504         struct cfi_buf {
 505                 /*
 506                  * cfi_pending_first and cfi_pending_last describe the total
 507                  * amount of data outstanding for all the filters on
 508                  * this socket and data in the flow queue
 509                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 510                  */
 511                 uint64_t                cfi_pending_first;
 512                 uint64_t                cfi_pending_last;
 513                 uint32_t                cfi_pending_mbcnt;
 514                 uint32_t                cfi_pending_mbnum;
 515                 uint32_t                cfi_tail_drop_cnt;
 516                 /*
 517                  * cfi_pass_offset is the minimum of all the filters
 518                  */
 519                 uint64_t                cfi_pass_offset;
 520                 /*
 521                  * cfi_inject_q holds data that needs to be re-injected
 522                  * into the socket after filtering and that can
 523                  * be queued because of flow control
 524                  */
 525                 struct cfil_queue       cfi_inject_q;
 526         } cfi_snd, cfi_rcv;
 527
 528         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 529         struct cfil_hash_entry *cfi_hash_entry;
 530         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 531         os_refcnt_t             cfi_ref_count;
 532 } __attribute__((aligned(8)));
 533
 534 #define CFIF_DROP               0x0001  /* drop action applied */
 535 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 536 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 537 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 538 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 539 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 540 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 541 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 542 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 543
 544 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 545 #define CFI_SHIFT_GENCNT        32
 546 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 547 #define CFI_SHIFT_FLOWHASH      0
 548
 549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
 550
 551 static ZONE_DECLARE(cfil_info_zone, "cfil_info",
 552     sizeof(struct cfil_info), ZC_NONE);
 553
 554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 556
 557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 559
 560 /*
 561  * UDP Socket Support
 562  */
 563 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 564 #define CFILHASHSIZE 16
 565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 566
 567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
 568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
 569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
 571                                            (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
 572 #define IS_RAW(so)  (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW  && so->so_proto->pr_protocol == IPPROTO_RAW)
 573
 574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
 576 #else
 577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
 578 #endif
 579
 580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
 581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
 582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 583
 584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 585                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 588                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 592                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 593                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
 595                    ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
 596                     entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
 597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
 598
 599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
 600     (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
 601      (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
 602       so->so_proto->pr_type != SOCK_STREAM || \
 603       so->so_proto->pr_protocol != IPPROTO_TCP || \
 604       (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
 605       (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
 606
 607 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
 608
 609 #define CFIL_INFO_FREE(cfil_info) \
 610     if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
 611         cfil_info_free(cfil_info); \
 612     }
 613
 614 /*
 615  * Periodic Statistics Report:
 616  */
 617 static struct thread *cfil_stats_report_thread;
 618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
 619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
 620 #define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
 621
 622 /* This buffer must have same layout as struct cfil_msg_stats_report */
 623 struct cfil_stats_report_buffer {
 624         struct cfil_msg_hdr        msghdr;
 625         uint32_t                   count;
 626         struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
 627 };
 628 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
 629 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
 630
 631 /*
 632  * UDP Garbage Collection:
 633  */
 634 static struct thread *cfil_udp_gc_thread;
 635 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 636 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 637 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 639
 640 /*
 641  * UDP flow queue thresholds
 642  */
 643 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 644 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 645 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 646 /*
 647  * UDP flow queue threshold globals:
 648  */
 649 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 650 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 651
 652 /*
 653  * struct cfil_hash_entry
 654  *
 655  * Hash entry for cfil_info
 656  */
 657 struct cfil_hash_entry {
 658         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 659         struct cfil_info               *cfentry_cfil;
 660         u_short cfentry_fport;
 661         u_short cfentry_lport;
 662         sa_family_t                    cfentry_family;
 663         u_int32_t                      cfentry_flowhash;
 664         u_int64_t                      cfentry_lastused;
 665         union {
 666                 /* foreign host table entry */
 667                 struct in_addr_4in6 addr46;
 668                 struct in6_addr addr6;
 669         } cfentry_faddr;
 670         union {
 671                 /* local host table entry */
 672                 struct in_addr_4in6 addr46;
 673                 struct in6_addr addr6;
 674         } cfentry_laddr;
 675         uint8_t                        cfentry_laddr_updated: 1;
 676         uint8_t                        cfentry_lport_updated: 1;
 677         uint8_t                        cfentry_reserved: 6;
 678 };
 679
 680 /*
 681  * struct cfil_db
 682  *
 683  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 684  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 685  */
 686 struct cfil_db {
 687         struct socket       *cfdb_so;
 688         uint32_t            cfdb_count;       /* Number of total content filters */
 689         struct cfilhashhead *cfdb_hashbase;
 690         u_long              cfdb_hashmask;
 691         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 692 };
 693
 694 /*
 695  * CFIL specific mbuf tag:
 696  * Save state of socket at the point of data entry into cfil.
 697  * Use saved state for reinjection at protocol layer.
 698  */
 699 struct cfil_tag {
 700         union sockaddr_in_4_6 cfil_faddr;
 701         uint32_t cfil_so_state_change_cnt;
 702         uint32_t cfil_so_options;
 703         int cfil_inp_flags;
 704 };
 705
 706 static ZONE_DECLARE(cfil_hash_entry_zone, "cfil_entry_hash",
 707     sizeof(struct cfil_hash_entry), ZC_NONE);
 708
 709 static ZONE_DECLARE(cfil_db_zone, "cfil_db",
 710     sizeof(struct cfil_db), ZC_NONE);
 711
 712 /*
 713  * Statistics
 714  */
 715
 716 struct cfil_stats cfil_stats;
 717
 718 /*
 719  * For troubleshooting
 720  */
 721 int cfil_log_level = LOG_ERR;
 722 int cfil_debug = 1;
 723
 724 // Debug controls added for selective debugging.
 725 // Disabled for production.  If enabled,
 726 // these will have performance impact
 727 #define LIFECYCLE_DEBUG 0
 728 #define VERDICT_DEBUG 0
 729 #define DATA_DEBUG 0
 730 #define SHOW_DEBUG 0
 731 #define GC_DEBUG 0
 732 #define STATS_DEBUG 0
 733
 734 /*
 735  * Sysctls for logs and statistics
 736  */
 737 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 738     struct sysctl_req *);
 739 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 740     struct sysctl_req *);
 741
 742 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 743
 744 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 745     &cfil_log_level, 0, "");
 746
 747 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 748     &cfil_debug, 0, "");
 749
 750 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 751     &cfil_sock_attached_count, 0, "");
 752
 753 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 754     &cfil_active_count, 0, "");
 755
 756 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 757     &cfil_close_wait_timeout, 0, "");
 758
 759 static int cfil_sbtrim = 1;
 760 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 761     &cfil_sbtrim, 0, "");
 762
 763 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 764     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 765
 766 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 767     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 768
 769 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 770     &cfil_stats, cfil_stats, "");
 771
 772 /*
 773  * Forward declaration to appease the compiler
 774  */
 775 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 776     uint64_t, uint64_t);
 777 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 780 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 781 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 782     struct mbuf *, struct mbuf *, uint32_t);
 783 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 784     struct mbuf *, uint32_t);
 785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 786     struct in_addr, u_int16_t);
 787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 788     struct in6_addr *, u_int16_t);
 789
 790 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 791 static void cfil_info_free(struct cfil_info *);
 792 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 793 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 794 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 795 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 796 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 797 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 798 static void cfil_info_verify(struct cfil_info *);
 799 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 800     uint64_t, uint64_t);
 801 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 802 static void cfil_release_sockbuf(struct socket *, int);
 803 static int cfil_filters_attached(struct socket *);
 804
 805 static void cfil_rw_lock_exclusive(lck_rw_t *);
 806 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 807 static void cfil_rw_lock_shared(lck_rw_t *);
 808 static void cfil_rw_unlock_shared(lck_rw_t *);
 809 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 811
 812 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 813 static errno_t cfil_db_init(struct socket *);
 814 static void cfil_db_free(struct socket *so);
 815 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
 816 struct cfil_hash_entry *cfil_db_lookup_entry_internal(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t, boolean_t);
 817 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 818 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 819 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *, struct mbuf *);
 820 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 821 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, struct mbuf *, int);
 822 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 823 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 824     struct mbuf *, struct mbuf *, uint32_t);
 825 static int cfil_sock_udp_get_address_from_control(sa_family_t, struct mbuf *, uint8_t **);
 826 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 827 static void cfil_sock_udp_is_closed(struct socket *);
 828 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 829 static int cfil_sock_udp_shutdown(struct socket *, int *);
 830 static void cfil_sock_udp_close_wait(struct socket *);
 831 static void cfil_sock_udp_buf_update(struct sockbuf *);
 832 static int cfil_filters_udp_attached(struct socket *, bool);
 833 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 834     struct in6_addr **, struct in6_addr **,
 835     u_int16_t *, u_int16_t *);
 836 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 837     struct in_addr *, struct in_addr *,
 838     u_int16_t *, u_int16_t *);
 839 static void cfil_info_log(int, struct cfil_info *, const char *);
 840 void cfil_filter_show(u_int32_t);
 841 void cfil_info_show(void);
 842 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int64_t);
 843 bool cfil_info_action_timed_out(struct cfil_info *, int);
 844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 845 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
 846 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
 847 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 848 static void cfil_info_udp_expire(void *, wait_result_t);
 849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *, bool);
 850 static void cfil_sock_received_verdict(struct socket *so);
 851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 852     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 853     boolean_t, boolean_t);
 854 static void cfil_stats_report_thread_func(void *, wait_result_t);
 855 static void cfil_stats_report(void *v, wait_result_t w);
 856
 857 bool check_port(struct sockaddr *, u_short);
 858
 859 /*
 860  * Content filter global read write lock
 861  */
 862
 863 static void
 864 cfil_rw_lock_exclusive(lck_rw_t *lck)
 865 {
 866         void *lr_saved;
 867
 868         lr_saved = __builtin_return_address(0);
 869
 870         lck_rw_lock_exclusive(lck);
 871
 872         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 873         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 874 }
 875
 876 static void
 877 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 878 {
 879         void *lr_saved;
 880
 881         lr_saved = __builtin_return_address(0);
 882
 883         lck_rw_unlock_exclusive(lck);
 884
 885         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 886         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 887 }
 888
 889 static void
 890 cfil_rw_lock_shared(lck_rw_t *lck)
 891 {
 892         void *lr_saved;
 893
 894         lr_saved = __builtin_return_address(0);
 895
 896         lck_rw_lock_shared(lck);
 897
 898         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 899         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 900 }
 901
 902 static void
 903 cfil_rw_unlock_shared(lck_rw_t *lck)
 904 {
 905         void *lr_saved;
 906
 907         lr_saved = __builtin_return_address(0);
 908
 909         lck_rw_unlock_shared(lck);
 910
 911         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 912         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 913 }
 914
 915 static boolean_t
 916 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 917 {
 918         void *lr_saved;
 919         boolean_t upgraded;
 920
 921         lr_saved = __builtin_return_address(0);
 922
 923         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 924         if (upgraded) {
 925                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 926                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 927         }
 928         return upgraded;
 929 }
 930
 931 static void
 932 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 933 {
 934         void *lr_saved;
 935
 936         lr_saved = __builtin_return_address(0);
 937
 938         lck_rw_lock_exclusive_to_shared(lck);
 939
 940         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 941         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 942 }
 943
 944 static void
 945 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 946 {
 947 #if !MACH_ASSERT
 948 #pragma unused(lck, exclusive)
 949 #endif
 950         LCK_RW_ASSERT(lck,
 951             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 952 }
 953
 954 /*
 955  * Return the number of bytes in the mbuf chain using the same
 956  * method as m_length() or sballoc()
 957  *
 958  * Returns data len - starting from PKT start
 959  * - retmbcnt - optional param to get total mbuf bytes in chain
 960  * - retmbnum - optional param to get number of mbufs in chain
 961  */
 962 static unsigned int
 963 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 964 {
 965         struct mbuf *m0;
 966         unsigned int pktlen = 0;
 967         int mbcnt;
 968         int mbnum;
 969
 970         // Locate the start of data
 971         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 972                 if (m0->m_flags & M_PKTHDR) {
 973                         break;
 974                 }
 975         }
 976         if (m0 == NULL) {
 977                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 978                 return 0;
 979         }
 980         m = m0;
 981
 982         if (retmbcnt == NULL && retmbnum == NULL) {
 983                 return m_length(m);
 984         }
 985
 986         pktlen = 0;
 987         mbcnt = 0;
 988         mbnum = 0;
 989         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 990                 pktlen += m0->m_len;
 991                 mbnum++;
 992                 mbcnt += MSIZE;
 993                 if (m0->m_flags & M_EXT) {
 994                         mbcnt += m0->m_ext.ext_size;
 995                 }
 996         }
 997         if (retmbcnt) {
 998                 *retmbcnt = mbcnt;
 999         }
1000         if (retmbnum) {
1001                 *retmbnum = mbnum;
1002         }
1003         return pktlen;
1004 }
1005
1006 static struct mbuf *
1007 cfil_data_start(struct mbuf *m)
1008 {
1009         struct mbuf *m0;
1010
1011         // Locate the start of data
1012         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1013                 if (m0->m_flags & M_PKTHDR) {
1014                         break;
1015                 }
1016         }
1017         return m0;
1018 }
1019
1020 /*
1021  * Common mbuf queue utilities
1022  */
1023
1024 static inline void
1025 cfil_queue_init(struct cfil_queue *cfq)
1026 {
1027         cfq->q_start = 0;
1028         cfq->q_end = 0;
1029         MBUFQ_INIT(&cfq->q_mq);
1030 }
1031
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue *cfq)
1034 {
1035         uint64_t drained = cfq->q_start - cfq->q_end;
1036         cfq->q_start = 0;
1037         cfq->q_end = 0;
1038         MBUFQ_DRAIN(&cfq->q_mq);
1039
1040         return drained;
1041 }
1042
1043 /* Return 1 when empty, 0 otherwise */
1044 static inline int
1045 cfil_queue_empty(struct cfil_queue *cfq)
1046 {
1047         return MBUFQ_EMPTY(&cfq->q_mq);
1048 }
1049
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue *cfq)
1052 {
1053         return cfq->q_start;
1054 }
1055
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue *cfq)
1058 {
1059         return cfq->q_end;
1060 }
1061
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue *cfq)
1064 {
1065         return cfq->q_end - cfq->q_start;
1066 }
1067
1068 /*
1069  * Routines to verify some fundamental assumptions
1070  */
1071
1072 static void
1073 cfil_queue_verify(struct cfil_queue *cfq)
1074 {
1075         mbuf_t chain;
1076         mbuf_t m;
1077         mbuf_t n;
1078         uint64_t queuesize = 0;
1079
1080         /* Verify offset are ordered */
1081         VERIFY(cfq->q_start <= cfq->q_end);
1082
1083         /*
1084          * When queue is empty, the offsets are equal otherwise the offsets
1085          * are different
1086          */
1087         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1088             (!MBUFQ_EMPTY(&cfq->q_mq) &&
1089             cfq->q_start != cfq->q_end));
1090
1091         MBUFQ_FOREACH(chain, &cfq->q_mq) {
1092                 size_t chainsize = 0;
1093                 m = chain;
1094                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1095                 // skip the addr and control stuff if present
1096                 m = cfil_data_start(m);
1097
1098                 if (m == NULL ||
1099                     m == (void *)M_TAG_FREE_PATTERN ||
1100                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1101                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1102                         panic("%s - mq %p is free at %p", __func__,
1103                             &cfq->q_mq, m);
1104                 }
1105                 for (n = m; n != NULL; n = n->m_next) {
1106                         if (n->m_type != MT_DATA &&
1107                             n->m_type != MT_HEADER &&
1108                             n->m_type != MT_OOBDATA) {
1109                                 panic("%s - %p unsupported type %u", __func__,
1110                                     n, n->m_type);
1111                         }
1112                         chainsize += n->m_len;
1113                 }
1114                 if (mlen != chainsize) {
1115                         panic("%s - %p m_length() %u != chainsize %lu",
1116                             __func__, m, mlen, chainsize);
1117                 }
1118                 queuesize += chainsize;
1119         }
1120         if (queuesize != cfq->q_end - cfq->q_start) {
1121                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1122                     m, queuesize, cfq->q_end - cfq->q_start);
1123         }
1124 }
1125
1126 static void
1127 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1128 {
1129         CFIL_QUEUE_VERIFY(cfq);
1130
1131         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1132         cfq->q_end += len;
1133
1134         CFIL_QUEUE_VERIFY(cfq);
1135 }
1136
1137 static void
1138 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1139 {
1140         CFIL_QUEUE_VERIFY(cfq);
1141
1142         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1143
1144         MBUFQ_REMOVE(&cfq->q_mq, m);
1145         MBUFQ_NEXT(m) = NULL;
1146         cfq->q_start += len;
1147
1148         CFIL_QUEUE_VERIFY(cfq);
1149 }
1150
1151 static mbuf_t
1152 cfil_queue_first(struct cfil_queue *cfq)
1153 {
1154         return MBUFQ_FIRST(&cfq->q_mq);
1155 }
1156
1157 static mbuf_t
1158 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1159 {
1160 #pragma unused(cfq)
1161         return MBUFQ_NEXT(m);
1162 }
1163
1164 static void
1165 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1166 {
1167         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1168         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1169
1170         /* Verify the queues are ordered so that pending is before ctl */
1171         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1172
1173         /* The peek offset cannot be less than the pass offset */
1174         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1175
1176         /* Make sure we've updated the offset we peeked at  */
1177         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1178 }
1179
1180 static void
1181 cfil_entry_verify(struct cfil_entry *entry)
1182 {
1183         cfil_entry_buf_verify(&entry->cfe_snd);
1184         cfil_entry_buf_verify(&entry->cfe_rcv);
1185 }
1186
1187 static void
1188 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1189 {
1190         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1191
1192         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1193 }
1194
1195 static void
1196 cfil_info_verify(struct cfil_info *cfil_info)
1197 {
1198         int i;
1199
1200         if (cfil_info == NULL) {
1201                 return;
1202         }
1203
1204         cfil_info_buf_verify(&cfil_info->cfi_snd);
1205         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1206
1207         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1208                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1209         }
1210 }
1211
1212 static void
1213 verify_content_filter(struct content_filter *cfc)
1214 {
1215         struct cfil_entry *entry;
1216         uint32_t count = 0;
1217
1218         VERIFY(cfc->cf_sock_count >= 0);
1219
1220         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1221                 count++;
1222                 VERIFY(cfc == entry->cfe_filter);
1223         }
1224         VERIFY(count == cfc->cf_sock_count);
1225 }
1226
1227 /*
1228  * Kernel control socket callbacks
1229  */
1230 static errno_t
1231 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1232     void **unitinfo)
1233 {
1234         errno_t error = 0;
1235         struct content_filter *cfc = NULL;
1236
1237         CFIL_LOG(LOG_NOTICE, "");
1238
1239         cfc = zalloc(content_filter_zone);
1240         if (cfc == NULL) {
1241                 CFIL_LOG(LOG_ERR, "zalloc failed");
1242                 error = ENOMEM;
1243                 goto done;
1244         }
1245         bzero(cfc, sizeof(struct content_filter));
1246
1247         cfil_rw_lock_exclusive(&cfil_lck_rw);
1248         if (content_filters == NULL) {
1249                 struct content_filter **tmp;
1250
1251                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1252
1253                 MALLOC(tmp,
1254                     struct content_filter **,
1255                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1256                     M_TEMP,
1257                     M_WAITOK | M_ZERO);
1258
1259                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1260
1261                 if (tmp == NULL && content_filters == NULL) {
1262                         error = ENOMEM;
1263                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1264                         goto done;
1265                 }
1266                 /* Another thread may have won the race */
1267                 if (content_filters != NULL) {
1268                         FREE(tmp, M_TEMP);
1269                 } else {
1270                         content_filters = tmp;
1271                 }
1272         }
1273
1274         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1275                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1276                 error = EINVAL;
1277         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1278                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1279                 error = EADDRINUSE;
1280         } else {
1281                 /*
1282                  * kernel control socket kcunit numbers start at 1
1283                  */
1284                 content_filters[sac->sc_unit - 1] = cfc;
1285
1286                 cfc->cf_kcref = kctlref;
1287                 cfc->cf_kcunit = sac->sc_unit;
1288                 TAILQ_INIT(&cfc->cf_sock_entries);
1289
1290                 *unitinfo = cfc;
1291                 cfil_active_count++;
1292
1293                 // Allocate periodic stats buffer for this filter
1294                 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1295                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1296
1297                         struct cfil_stats_report_buffer *buf;
1298
1299                         MALLOC(buf,
1300                             struct cfil_stats_report_buffer *,
1301                             sizeof(struct cfil_stats_report_buffer),
1302                             M_TEMP,
1303                             M_WAITOK | M_ZERO);
1304
1305                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1306
1307                         if (buf == NULL) {
1308                                 error = ENOMEM;
1309                                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1310                                 goto done;
1311                         }
1312
1313                         /* Another thread may have won the race */
1314                         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1315                                 FREE(buf, M_TEMP);
1316                         } else {
1317                                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1318                         }
1319                 }
1320         }
1321         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1322 done:
1323         if (error != 0 && cfc != NULL) {
1324                 zfree(content_filter_zone, cfc);
1325         }
1326
1327         if (error == 0) {
1328                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1329         } else {
1330                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1331         }
1332
1333         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1334             error, cfil_active_count, sac->sc_unit);
1335
1336         return error;
1337 }
1338
1339 static errno_t
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1341 {
1342 #pragma unused(kctlref)
1343         errno_t error = 0;
1344         struct content_filter *cfc;
1345         struct cfil_entry *entry;
1346         uint64_t sock_flow_id = 0;
1347
1348         CFIL_LOG(LOG_NOTICE, "");
1349
1350         if (content_filters == NULL) {
1351                 CFIL_LOG(LOG_ERR, "no content filter");
1352                 error = EINVAL;
1353                 goto done;
1354         }
1355         if (kcunit > MAX_CONTENT_FILTER) {
1356                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357                     kcunit, MAX_CONTENT_FILTER);
1358                 error = EINVAL;
1359                 goto done;
1360         }
1361
1362         cfc = (struct content_filter *)unitinfo;
1363         if (cfc == NULL) {
1364                 goto done;
1365         }
1366
1367         cfil_rw_lock_exclusive(&cfil_lck_rw);
1368         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1369                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1370                     kcunit);
1371                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1372                 goto done;
1373         }
1374         cfc->cf_flags |= CFF_DETACHING;
1375         /*
1376          * Remove all sockets from the filter
1377          */
1378         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1379                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1380
1381                 verify_content_filter(cfc);
1382                 /*
1383                  * Accept all outstanding data by pushing to next filter
1384                  * or back to socket
1385                  *
1386                  * TBD: Actually we should make sure all data has been pushed
1387                  * back to socket
1388                  */
1389                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1390                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1391                         struct socket *so = cfil_info->cfi_so;
1392                         sock_flow_id = cfil_info->cfi_sock_id;
1393
1394                         /* Need to let data flow immediately */
1395                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1396                             CFEF_DATA_START;
1397
1398                         /*
1399                          * Respect locking hierarchy
1400                          */
1401                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1402
1403                         socket_lock(so, 1);
1404
1405                         /*
1406                          * When cfe_filter is NULL the filter is detached
1407                          * and the entry has been removed from cf_sock_entries
1408                          */
1409                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1410                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1411                                 goto release;
1412                         }
1413
1414                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1415                             CFM_MAX_OFFSET,
1416                             CFM_MAX_OFFSET);
1417
1418                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1419                             CFM_MAX_OFFSET,
1420                             CFM_MAX_OFFSET);
1421
1422                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1423
1424                         /*
1425                          * Check again to make sure if the cfil_info is still valid
1426                          * as the socket may have been unlocked when when calling
1427                          * cfil_acquire_sockbuf()
1428                          */
1429                         if (entry->cfe_filter == NULL ||
1430                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1431                                 goto release;
1432                         }
1433
1434                         /* The filter is now detached */
1435                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1436 #if LIFECYCLE_DEBUG
1437                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1438 #endif
1439                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1440                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1441                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1442                             cfil_filters_attached(so) == 0) {
1443                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1444                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1445                                 wakeup((caddr_t)cfil_info);
1446                         }
1447
1448                         /*
1449                          * Remove the filter entry from the content filter
1450                          * but leave the rest of the state intact as the queues
1451                          * may not be empty yet
1452                          */
1453                         entry->cfe_filter = NULL;
1454                         entry->cfe_necp_control_unit = 0;
1455
1456                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1457                         cfc->cf_sock_count--;
1458 release:
1459                         socket_unlock(so, 1);
1460                 }
1461         }
1462         verify_content_filter(cfc);
1463
1464         /* Free the stats buffer for this filter */
1465         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1466                 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1467                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1468         }
1469         VERIFY(cfc->cf_sock_count == 0);
1470
1471         /*
1472          * Make filter inactive
1473          */
1474         content_filters[kcunit - 1] = NULL;
1475         cfil_active_count--;
1476         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1477
1478         if (cfc->cf_crypto_state != NULL) {
1479                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1480                 cfc->cf_crypto_state = NULL;
1481         }
1482
1483         zfree(content_filter_zone, cfc);
1484 done:
1485         if (error == 0) {
1486                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1487         } else {
1488                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1489         }
1490
1491         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1492             error, cfil_active_count, kcunit);
1493
1494         return error;
1495 }
1496
1497 /*
1498  * cfil_acquire_sockbuf()
1499  *
1500  * Prevent any other thread from acquiring the sockbuf
1501  * We use sb_cfil_thread as a semaphore to prevent other threads from
1502  * messing with the sockbuf -- see sblock()
1503  * Note: We do not set SB_LOCK here because the thread may check or modify
1504  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505  * sblock(), sbunlock() or sodefunct()
1506  */
1507 static int
1508 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1509 {
1510         thread_t tp = current_thread();
1511         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1512         lck_mtx_t *mutex_held;
1513         int error = 0;
1514
1515         /*
1516          * Wait until no thread is holding the sockbuf and other content
1517          * filter threads have released the sockbuf
1518          */
1519         while ((sb->sb_flags & SB_LOCK) ||
1520             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1521                 if (so->so_proto->pr_getlock != NULL) {
1522                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1523                 } else {
1524                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1525                 }
1526
1527                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1528
1529                 sb->sb_wantlock++;
1530                 VERIFY(sb->sb_wantlock != 0);
1531
1532                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1533                     NULL);
1534
1535                 VERIFY(sb->sb_wantlock != 0);
1536                 sb->sb_wantlock--;
1537         }
1538         /*
1539          * Use reference count for repetitive calls on same thread
1540          */
1541         if (sb->sb_cfil_refs == 0) {
1542                 VERIFY(sb->sb_cfil_thread == NULL);
1543                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1544
1545                 sb->sb_cfil_thread = tp;
1546                 sb->sb_flags |= SB_LOCK;
1547         }
1548         sb->sb_cfil_refs++;
1549
1550         /* We acquire the socket buffer when we need to cleanup */
1551         if (cfil_info == NULL) {
1552                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1553                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1554                 error = 0;
1555         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1556                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1557                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1558                 error = EPIPE;
1559         }
1560
1561         return error;
1562 }
1563
1564 static void
1565 cfil_release_sockbuf(struct socket *so, int outgoing)
1566 {
1567         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1568         thread_t tp = current_thread();
1569
1570         socket_lock_assert_owned(so);
1571
1572         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1573                 panic("%s sb_cfil_thread %p not current %p", __func__,
1574                     sb->sb_cfil_thread, tp);
1575         }
1576         /*
1577          * Don't panic if we are defunct because SB_LOCK has
1578          * been cleared by sodefunct()
1579          */
1580         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1581                 panic("%s SB_LOCK not set on %p", __func__,
1582                     sb);
1583         }
1584         /*
1585          * We can unlock when the thread unwinds to the last reference
1586          */
1587         sb->sb_cfil_refs--;
1588         if (sb->sb_cfil_refs == 0) {
1589                 sb->sb_cfil_thread = NULL;
1590                 sb->sb_flags &= ~SB_LOCK;
1591
1592                 if (sb->sb_wantlock > 0) {
1593                         wakeup(&sb->sb_flags);
1594                 }
1595         }
1596 }
1597
1598 cfil_sock_id_t
1599 cfil_sock_id_from_socket(struct socket *so)
1600 {
1601         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1602                 return so->so_cfil->cfi_sock_id;
1603         } else {
1604                 return CFIL_SOCK_ID_NONE;
1605         }
1606 }
1607
1608 /*
1609  * cfil_socket_safe_lock -
1610  * This routine attempts to lock the socket safely.
1611  *
1612  * The passed in pcbinfo is assumed to be locked and must be unlocked once the
1613  * inp state is safeguarded and before we attempt to lock/unlock the socket.
1614  * This is to prevent getting blocked by socket_lock() while holding the pcbinfo
1615  * lock, avoiding potential deadlock with other processes contending for the same
1616  * resources.  This is also to avoid double locking the pcbinfo for rip sockets
1617  * since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1618  * so_usecount is 0.
1619  */
1620 static bool
1621 cfil_socket_safe_lock(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1622 {
1623         struct socket *so = NULL;
1624
1625         VERIFY(pcbinfo != NULL);
1626
1627         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1628                 // Safeguarded the inp state, unlock pcbinfo before locking socket.
1629                 lck_rw_done(pcbinfo->ipi_lock);
1630
1631                 so = inp->inp_socket;
1632                 socket_lock(so, 1);
1633                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1634                         return true;
1635                 }
1636         } else {
1637                 // Failed to safeguarded the inp state, unlock pcbinfo and abort.
1638                 lck_rw_done(pcbinfo->ipi_lock);
1639         }
1640
1641         if (so) {
1642                 socket_unlock(so, 1);
1643         }
1644         return false;
1645 }
1646
1647 static struct socket *
1648 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1649 {
1650         struct socket *so = NULL;
1651         u_int64_t gencnt = cfil_sock_id >> 32;
1652         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1653         struct inpcb *inp = NULL;
1654         struct inpcbinfo *pcbinfo = NULL;
1655
1656 #if VERDICT_DEBUG
1657         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1658 #endif
1659
1660         if (udp_only) {
1661                 goto find_udp;
1662         }
1663
1664         pcbinfo = &tcbinfo;
1665         lck_rw_lock_shared(pcbinfo->ipi_lock);
1666         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1667                 if (inp->inp_state != INPCB_STATE_DEAD &&
1668                     inp->inp_socket != NULL &&
1669                     inp->inp_flowhash == flowhash &&
1670                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1671                     inp->inp_socket->so_cfil != NULL) {
1672                         if (cfil_socket_safe_lock(inp, pcbinfo)) {
1673                                 so = inp->inp_socket;
1674                         }
1675                         /* pcbinfo is already unlocked, we are done. */
1676                         goto done;
1677                 }
1678         }
1679         lck_rw_done(pcbinfo->ipi_lock);
1680         if (so != NULL) {
1681                 goto done;
1682         }
1683
1684 find_udp:
1685
1686         pcbinfo = &udbinfo;
1687         lck_rw_lock_shared(pcbinfo->ipi_lock);
1688         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1689                 if (inp->inp_state != INPCB_STATE_DEAD &&
1690                     inp->inp_socket != NULL &&
1691                     inp->inp_socket->so_cfil_db != NULL &&
1692                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1693                         if (cfil_socket_safe_lock(inp, pcbinfo)) {
1694                                 so = inp->inp_socket;
1695                         }
1696                         /* pcbinfo is already unlocked, we are done. */
1697                         goto done;
1698                 }
1699         }
1700         lck_rw_done(pcbinfo->ipi_lock);
1701         if (so != NULL) {
1702                 goto done;
1703         }
1704
1705         pcbinfo = &ripcbinfo;
1706         lck_rw_lock_shared(pcbinfo->ipi_lock);
1707         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1708                 if (inp->inp_state != INPCB_STATE_DEAD &&
1709                     inp->inp_socket != NULL &&
1710                     inp->inp_socket->so_cfil_db != NULL &&
1711                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1712                         if (cfil_socket_safe_lock(inp, pcbinfo)) {
1713                                 so = inp->inp_socket;
1714                         }
1715                         /* pcbinfo is already unlocked, we are done. */
1716                         goto done;
1717                 }
1718         }
1719         lck_rw_done(pcbinfo->ipi_lock);
1720
1721 done:
1722         if (so == NULL) {
1723                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1724                 CFIL_LOG(LOG_DEBUG,
1725                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1726                     cfil_sock_id, gencnt, flowhash);
1727         }
1728
1729         return so;
1730 }
1731
1732 static struct socket *
1733 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1734 {
1735         struct socket *so = NULL;
1736         struct inpcb *inp = NULL;
1737         struct inpcbinfo *pcbinfo = &tcbinfo;
1738
1739         lck_rw_lock_shared(pcbinfo->ipi_lock);
1740         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1741                 if (inp->inp_state != INPCB_STATE_DEAD &&
1742                     inp->inp_socket != NULL &&
1743                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1744                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1745                         if (cfil_socket_safe_lock(inp, pcbinfo)) {
1746                                 so = inp->inp_socket;
1747                         }
1748                         /* pcbinfo is already unlocked, we are done. */
1749                         goto done;
1750                 }
1751         }
1752         lck_rw_done(pcbinfo->ipi_lock);
1753         if (so != NULL) {
1754                 goto done;
1755         }
1756
1757         pcbinfo = &udbinfo;
1758         lck_rw_lock_shared(pcbinfo->ipi_lock);
1759         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1760                 if (inp->inp_state != INPCB_STATE_DEAD &&
1761                     inp->inp_socket != NULL &&
1762                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1763                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1764                         if (cfil_socket_safe_lock(inp, pcbinfo)) {
1765                                 so = inp->inp_socket;
1766                         }
1767                         /* pcbinfo is already unlocked, we are done. */
1768                         goto done;
1769                 }
1770         }
1771         lck_rw_done(pcbinfo->ipi_lock);
1772
1773 done:
1774         return so;
1775 }
1776
1777 static void
1778 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1779 {
1780         struct cfil_info *cfil = NULL;
1781         Boolean found = FALSE;
1782         int kcunit;
1783
1784         if (cfil_info == NULL) {
1785                 return;
1786         }
1787
1788         if (report_frequency) {
1789                 if (entry == NULL) {
1790                         return;
1791                 }
1792
1793                 // Update stats reporting frequency.
1794                 if (entry->cfe_stats_report_frequency != report_frequency) {
1795                         entry->cfe_stats_report_frequency = report_frequency;
1796                         if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1797                                 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1798                         }
1799                         microuptime(&entry->cfe_stats_report_ts);
1800
1801                         // Insert cfil_info into list only if it is not in yet.
1802                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1803                                 if (cfil == cfil_info) {
1804                                         return;
1805                                 }
1806                         }
1807
1808                         TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1809
1810                         // Wake up stats thread if this is first flow added
1811                         if (cfil_sock_attached_stats_count == 0) {
1812                                 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1813                         }
1814                         cfil_sock_attached_stats_count++;
1815 #if STATS_DEBUG
1816                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1817                             cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1818                             cfil_info->cfi_sock_id,
1819                             entry->cfe_stats_report_frequency);
1820 #endif
1821                 }
1822         } else {
1823                 // Turn off stats reporting for this filter.
1824                 if (entry != NULL) {
1825                         // Already off, no change.
1826                         if (entry->cfe_stats_report_frequency == 0) {
1827                                 return;
1828                         }
1829
1830                         entry->cfe_stats_report_frequency = 0;
1831                         // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1832                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1833                                 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1834                                         return;
1835                                 }
1836                         }
1837                 }
1838
1839                 // No more filter asking for stats for this cfil_info, remove from list.
1840                 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1841                         found = FALSE;
1842                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1843                                 if (cfil == cfil_info) {
1844                                         found = TRUE;
1845                                         break;
1846                                 }
1847                         }
1848                         if (found) {
1849                                 cfil_sock_attached_stats_count--;
1850                                 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1851 #if STATS_DEBUG
1852                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1853                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1854                                     cfil_info->cfi_sock_id);
1855 #endif
1856                         }
1857                 }
1858         }
1859 }
1860
1861 static errno_t
1862 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1863     int flags)
1864 {
1865 #pragma unused(kctlref, flags)
1866         errno_t error = 0;
1867         struct cfil_msg_hdr *msghdr;
1868         struct content_filter *cfc = (struct content_filter *)unitinfo;
1869         struct socket *so;
1870         struct cfil_msg_action *action_msg;
1871         struct cfil_entry *entry;
1872         struct cfil_info *cfil_info = NULL;
1873         unsigned int data_len = 0;
1874
1875         CFIL_LOG(LOG_INFO, "");
1876
1877         if (content_filters == NULL) {
1878                 CFIL_LOG(LOG_ERR, "no content filter");
1879                 error = EINVAL;
1880                 goto done;
1881         }
1882         if (kcunit > MAX_CONTENT_FILTER) {
1883                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1884                     kcunit, MAX_CONTENT_FILTER);
1885                 error = EINVAL;
1886                 goto done;
1887         }
1888         if (m == NULL) {
1889                 CFIL_LOG(LOG_ERR, "null mbuf");
1890                 error = EINVAL;
1891                 goto done;
1892         }
1893         data_len = m_length(m);
1894
1895         if (data_len < sizeof(struct cfil_msg_hdr)) {
1896                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1897                 error = EINVAL;
1898                 goto done;
1899         }
1900         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1901         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1902                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1903                 error = EINVAL;
1904                 goto done;
1905         }
1906         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1907                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1908                 error = EINVAL;
1909                 goto done;
1910         }
1911         if (msghdr->cfm_len > data_len) {
1912                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1913                 error = EINVAL;
1914                 goto done;
1915         }
1916
1917         /* Validate action operation */
1918         switch (msghdr->cfm_op) {
1919         case CFM_OP_DATA_UPDATE:
1920                 OSIncrementAtomic(
1921                         &cfil_stats.cfs_ctl_action_data_update);
1922                 break;
1923         case CFM_OP_DROP:
1924                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1925                 break;
1926         case CFM_OP_BLESS_CLIENT:
1927                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1928                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1929                         error = EINVAL;
1930                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1931                             msghdr->cfm_len,
1932                             msghdr->cfm_op);
1933                         goto done;
1934                 }
1935                 error = cfil_action_bless_client(kcunit, msghdr);
1936                 goto done;
1937         case CFM_OP_SET_CRYPTO_KEY:
1938                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1939                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1940                         error = EINVAL;
1941                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1942                             msghdr->cfm_len,
1943                             msghdr->cfm_op);
1944                         goto done;
1945                 }
1946                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1947                 goto done;
1948         default:
1949                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1950                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1951                 error = EINVAL;
1952                 goto done;
1953         }
1954         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1955                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1956                 error = EINVAL;
1957                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1958                     msghdr->cfm_len,
1959                     msghdr->cfm_op);
1960                 goto done;
1961         }
1962         cfil_rw_lock_shared(&cfil_lck_rw);
1963         if (cfc != (void *)content_filters[kcunit - 1]) {
1964                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1965                     kcunit);
1966                 error = EINVAL;
1967                 cfil_rw_unlock_shared(&cfil_lck_rw);
1968                 goto done;
1969         }
1970         cfil_rw_unlock_shared(&cfil_lck_rw);
1971
1972         // Search for socket (TCP+UDP and lock so)
1973         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1974         if (so == NULL) {
1975                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1976                     msghdr->cfm_sock_id);
1977                 error = EINVAL;
1978                 goto done;
1979         }
1980
1981         cfil_info = so->so_cfil_db != NULL ?
1982             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1983
1984         // We should not obtain global lock here in order to avoid deadlock down the path.
1985         // But we attempt to retain a valid cfil_info to prevent any deallocation until
1986         // we are done.  Abort retain if cfil_info has already entered the free code path.
1987         if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1988                 socket_unlock(so, 1);
1989                 goto done;
1990         }
1991
1992         if (cfil_info == NULL) {
1993                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1994                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1995                 error = EINVAL;
1996                 goto unlock;
1997         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1998                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1999                     (uint64_t)VM_KERNEL_ADDRPERM(so));
2000                 error = EINVAL;
2001                 goto unlock;
2002         }
2003
2004         if (cfil_info->cfi_debug) {
2005                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2006         }
2007
2008         entry = &cfil_info->cfi_entries[kcunit - 1];
2009         if (entry->cfe_filter == NULL) {
2010                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2011                     (uint64_t)VM_KERNEL_ADDRPERM(so));
2012                 error = EINVAL;
2013                 goto unlock;
2014         }
2015
2016         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2017                 entry->cfe_flags |= CFEF_DATA_START;
2018         } else {
2019                 CFIL_LOG(LOG_ERR,
2020                     "so %llx attached not sent for %u",
2021                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2022                 error = EINVAL;
2023                 goto unlock;
2024         }
2025
2026         microuptime(&entry->cfe_last_action);
2027         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2028
2029         action_msg = (struct cfil_msg_action *)msghdr;
2030
2031         switch (msghdr->cfm_op) {
2032         case CFM_OP_DATA_UPDATE:
2033
2034                 if (cfil_info->cfi_debug) {
2035                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2036                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2037                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2038                             cfil_info->cfi_sock_id,
2039                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2040                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2041                 }
2042
2043 #if VERDICT_DEBUG
2044                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2045                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2046                     cfil_info->cfi_sock_id,
2047                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2048                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2049 #endif
2050                 /*
2051                  * Received verdict, at this point we know this
2052                  * socket connection is allowed.  Unblock thread
2053                  * immediately before proceeding to process the verdict.
2054                  */
2055                 cfil_sock_received_verdict(so);
2056
2057                 if (action_msg->cfa_out_peek_offset != 0 ||
2058                     action_msg->cfa_out_pass_offset != 0) {
2059                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2060                             action_msg->cfa_out_pass_offset,
2061                             action_msg->cfa_out_peek_offset);
2062                 }
2063                 if (error == EJUSTRETURN) {
2064                         error = 0;
2065                 }
2066                 if (error != 0) {
2067                         break;
2068                 }
2069                 if (action_msg->cfa_in_peek_offset != 0 ||
2070                     action_msg->cfa_in_pass_offset != 0) {
2071                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2072                             action_msg->cfa_in_pass_offset,
2073                             action_msg->cfa_in_peek_offset);
2074                 }
2075                 if (error == EJUSTRETURN) {
2076                         error = 0;
2077                 }
2078
2079                 // Toggle stats reporting according to received verdict.
2080                 cfil_rw_lock_exclusive(&cfil_lck_rw);
2081                 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2082                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2083
2084                 break;
2085
2086         case CFM_OP_DROP:
2087                 if (cfil_info->cfi_debug) {
2088                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2089                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2090                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2091                             cfil_info->cfi_sock_id,
2092                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2093                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2094                 }
2095
2096 #if VERDICT_DEBUG
2097                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2098                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2099                     cfil_info->cfi_sock_id,
2100                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2101                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2102 #endif
2103                 error = cfil_action_drop(so, cfil_info, kcunit);
2104                 cfil_sock_received_verdict(so);
2105                 break;
2106
2107         default:
2108                 error = EINVAL;
2109                 break;
2110         }
2111 unlock:
2112         CFIL_INFO_FREE(cfil_info)
2113         socket_unlock(so, 1);
2114 done:
2115         mbuf_freem(m);
2116
2117         if (error == 0) {
2118                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2119         } else {
2120                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2121         }
2122
2123         return error;
2124 }
2125
2126 static errno_t
2127 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2128     int opt, void *data, size_t *len)
2129 {
2130 #pragma unused(kctlref, opt)
2131         struct cfil_info *cfil_info = NULL;
2132         errno_t error = 0;
2133         struct content_filter *cfc = (struct content_filter *)unitinfo;
2134
2135         CFIL_LOG(LOG_NOTICE, "");
2136
2137         cfil_rw_lock_shared(&cfil_lck_rw);
2138
2139         if (content_filters == NULL) {
2140                 CFIL_LOG(LOG_ERR, "no content filter");
2141                 error = EINVAL;
2142                 goto done;
2143         }
2144         if (kcunit > MAX_CONTENT_FILTER) {
2145                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2146                     kcunit, MAX_CONTENT_FILTER);
2147                 error = EINVAL;
2148                 goto done;
2149         }
2150         if (cfc != (void *)content_filters[kcunit - 1]) {
2151                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2152                     kcunit);
2153                 error = EINVAL;
2154                 goto done;
2155         }
2156         switch (opt) {
2157         case CFIL_OPT_NECP_CONTROL_UNIT:
2158                 if (*len < sizeof(uint32_t)) {
2159                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2160                         error = EINVAL;
2161                         goto done;
2162                 }
2163                 if (data != NULL) {
2164                         *(uint32_t *)data = cfc->cf_necp_control_unit;
2165                 }
2166                 break;
2167         case CFIL_OPT_GET_SOCKET_INFO:
2168                 if (*len != sizeof(struct cfil_opt_sock_info)) {
2169                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2170                         error = EINVAL;
2171                         goto done;
2172                 }
2173                 if (data == NULL) {
2174                         CFIL_LOG(LOG_ERR, "data not passed");
2175                         error = EINVAL;
2176                         goto done;
2177                 }
2178
2179                 struct cfil_opt_sock_info *sock_info =
2180                     (struct cfil_opt_sock_info *) data;
2181
2182                 // Unlock here so that we never hold both cfil_lck_rw and the
2183                 // socket_lock at the same time. Otherwise, this can deadlock
2184                 // because soclose() takes the socket_lock and then exclusive
2185                 // cfil_lck_rw and we require the opposite order.
2186
2187                 // WARNING: Be sure to never use anything protected
2188                 //     by cfil_lck_rw beyond this point.
2189                 // WARNING: Be sure to avoid fallthrough and
2190                 //     goto return_already_unlocked from this branch.
2191                 cfil_rw_unlock_shared(&cfil_lck_rw);
2192
2193                 // Search (TCP+UDP) and lock socket
2194                 struct socket *sock =
2195                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2196                 if (sock == NULL) {
2197 #if LIFECYCLE_DEBUG
2198                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2199                             sock_info->cfs_sock_id);
2200 #endif
2201                         error = ENOENT;
2202                         goto return_already_unlocked;
2203                 }
2204
2205                 cfil_info = (sock->so_cfil_db != NULL) ?
2206                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2207
2208                 if (cfil_info == NULL) {
2209 #if LIFECYCLE_DEBUG
2210                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2211                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
2212 #endif
2213                         error = EINVAL;
2214                         socket_unlock(sock, 1);
2215                         goto return_already_unlocked;
2216                 }
2217
2218                 // Fill out family, type, and protocol
2219                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2220                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2221                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2222
2223                 // Source and destination addresses
2224                 struct inpcb *inp = sotoinpcb(sock);
2225                 if (inp->inp_vflag & INP_IPV6) {
2226                         struct in6_addr *laddr = NULL, *faddr = NULL;
2227                         u_int16_t lport = 0, fport = 0;
2228
2229                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2230                             &laddr, &faddr, &lport, &fport);
2231                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2232                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2233                 } else if (inp->inp_vflag & INP_IPV4) {
2234                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2235                         u_int16_t lport = 0, fport = 0;
2236
2237                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2238                             &laddr, &faddr, &lport, &fport);
2239                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2240                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2241                 }
2242
2243                 // Set the pid info
2244                 sock_info->cfs_pid = sock->last_pid;
2245                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2246
2247                 if (sock->so_flags & SOF_DELEGATED) {
2248                         sock_info->cfs_e_pid = sock->e_pid;
2249                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2250                 } else {
2251                         sock_info->cfs_e_pid = sock->last_pid;
2252                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2253                 }
2254
2255                 socket_unlock(sock, 1);
2256
2257                 goto return_already_unlocked;
2258         default:
2259                 error = ENOPROTOOPT;
2260                 break;
2261         }
2262 done:
2263         cfil_rw_unlock_shared(&cfil_lck_rw);
2264
2265         return error;
2266
2267 return_already_unlocked:
2268
2269         return error;
2270 }
2271
2272 static errno_t
2273 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2274     int opt, void *data, size_t len)
2275 {
2276 #pragma unused(kctlref, opt)
2277         errno_t error = 0;
2278         struct content_filter *cfc = (struct content_filter *)unitinfo;
2279
2280         CFIL_LOG(LOG_NOTICE, "");
2281
2282         cfil_rw_lock_exclusive(&cfil_lck_rw);
2283
2284         if (content_filters == NULL) {
2285                 CFIL_LOG(LOG_ERR, "no content filter");
2286                 error = EINVAL;
2287                 goto done;
2288         }
2289         if (kcunit > MAX_CONTENT_FILTER) {
2290                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2291                     kcunit, MAX_CONTENT_FILTER);
2292                 error = EINVAL;
2293                 goto done;
2294         }
2295         if (cfc != (void *)content_filters[kcunit - 1]) {
2296                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2297                     kcunit);
2298                 error = EINVAL;
2299                 goto done;
2300         }
2301         switch (opt) {
2302         case CFIL_OPT_NECP_CONTROL_UNIT:
2303                 if (len < sizeof(uint32_t)) {
2304                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2305                             "len too small %lu", len);
2306                         error = EINVAL;
2307                         goto done;
2308                 }
2309                 if (cfc->cf_necp_control_unit != 0) {
2310                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2311                             "already set %u",
2312                             cfc->cf_necp_control_unit);
2313                         error = EINVAL;
2314                         goto done;
2315                 }
2316                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2317                 break;
2318         default:
2319                 error = ENOPROTOOPT;
2320                 break;
2321         }
2322 done:
2323         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2324
2325         return error;
2326 }
2327
2328
2329 static void
2330 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2331 {
2332 #pragma unused(kctlref, flags)
2333         struct content_filter *cfc = (struct content_filter *)unitinfo;
2334         struct socket *so = NULL;
2335         int error;
2336         struct cfil_entry *entry;
2337         struct cfil_info *cfil_info = NULL;
2338
2339         CFIL_LOG(LOG_INFO, "");
2340
2341         if (content_filters == NULL) {
2342                 CFIL_LOG(LOG_ERR, "no content filter");
2343                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2344                 return;
2345         }
2346         if (kcunit > MAX_CONTENT_FILTER) {
2347                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2348                     kcunit, MAX_CONTENT_FILTER);
2349                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2350                 return;
2351         }
2352         cfil_rw_lock_shared(&cfil_lck_rw);
2353         if (cfc != (void *)content_filters[kcunit - 1]) {
2354                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2355                     kcunit);
2356                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2357                 goto done;
2358         }
2359         /* Let's assume the flow control is lifted */
2360         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2361                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2362                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2363                 }
2364
2365                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2366
2367                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2368                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2369         }
2370         /*
2371          * Flow control will be raised again as soon as an entry cannot enqueue
2372          * to the kernel control socket
2373          */
2374         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2375                 verify_content_filter(cfc);
2376
2377                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2378
2379                 /* Find an entry that is flow controlled */
2380                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2381                         if (entry->cfe_cfil_info == NULL ||
2382                             entry->cfe_cfil_info->cfi_so == NULL) {
2383                                 continue;
2384                         }
2385                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2386                                 continue;
2387                         }
2388                 }
2389                 if (entry == NULL) {
2390                         break;
2391                 }
2392
2393                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2394
2395                 cfil_info = entry->cfe_cfil_info;
2396                 so = cfil_info->cfi_so;
2397
2398                 cfil_rw_unlock_shared(&cfil_lck_rw);
2399                 socket_lock(so, 1);
2400
2401                 do {
2402                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2403                         if (error == 0) {
2404                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2405                         }
2406                         cfil_release_sockbuf(so, 1);
2407                         if (error != 0) {
2408                                 break;
2409                         }
2410
2411                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2412                         if (error == 0) {
2413                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2414                         }
2415                         cfil_release_sockbuf(so, 0);
2416                 } while (0);
2417
2418                 socket_lock_assert_owned(so);
2419                 socket_unlock(so, 1);
2420
2421                 cfil_rw_lock_shared(&cfil_lck_rw);
2422         }
2423 done:
2424         cfil_rw_unlock_shared(&cfil_lck_rw);
2425 }
2426
2427 void
2428 cfil_init(void)
2429 {
2430         struct kern_ctl_reg kern_ctl;
2431         errno_t error = 0;
2432         unsigned int mbuf_limit = 0;
2433
2434         CFIL_LOG(LOG_NOTICE, "");
2435
2436         /*
2437          * Compile time verifications
2438          */
2439         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2440         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2441         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2442         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2443
2444         /*
2445          * Runtime time verifications
2446          */
2447         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2448             sizeof(uint32_t)));
2449         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2450             sizeof(uint32_t)));
2451         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2452             sizeof(uint32_t)));
2453         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2454             sizeof(uint32_t)));
2455
2456         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2457             sizeof(uint32_t)));
2458         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2459             sizeof(uint32_t)));
2460
2461         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2462             sizeof(uint32_t)));
2463         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2464             sizeof(uint32_t)));
2465         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2466             sizeof(uint32_t)));
2467         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2468             sizeof(uint32_t)));
2469
2470         /*
2471          * Allocate locks
2472          */
2473         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2474         if (cfil_lck_grp_attr == NULL) {
2475                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2476                 /* NOTREACHED */
2477         }
2478         cfil_lck_grp = lck_grp_alloc_init("content filter",
2479             cfil_lck_grp_attr);
2480         if (cfil_lck_grp == NULL) {
2481                 panic("%s: lck_grp_alloc_init failed", __func__);
2482                 /* NOTREACHED */
2483         }
2484         cfil_lck_attr = lck_attr_alloc_init();
2485         if (cfil_lck_attr == NULL) {
2486                 panic("%s: lck_attr_alloc_init failed", __func__);
2487                 /* NOTREACHED */
2488         }
2489         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2490
2491         TAILQ_INIT(&cfil_sock_head);
2492         TAILQ_INIT(&cfil_sock_head_stats);
2493
2494         /*
2495          * Register kernel control
2496          */
2497         bzero(&kern_ctl, sizeof(kern_ctl));
2498         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2499             sizeof(kern_ctl.ctl_name));
2500         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2501         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2502         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2503         kern_ctl.ctl_connect = cfil_ctl_connect;
2504         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2505         kern_ctl.ctl_send = cfil_ctl_send;
2506         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2507         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2508         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2509         error = ctl_register(&kern_ctl, &cfil_kctlref);
2510         if (error != 0) {
2511                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2512                 return;
2513         }
2514
2515         // Spawn thread for gargage collection
2516         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2517             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2518                 panic_plain("%s: Can't create UDP GC thread", __func__);
2519                 /* NOTREACHED */
2520         }
2521         /* this must not fail */
2522         VERIFY(cfil_udp_gc_thread != NULL);
2523
2524         // Spawn thread for statistics reporting
2525         if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2526             &cfil_stats_report_thread) != KERN_SUCCESS) {
2527                 panic_plain("%s: Can't create statistics report thread", __func__);
2528                 /* NOTREACHED */
2529         }
2530         /* this must not fail */
2531         VERIFY(cfil_stats_report_thread != NULL);
2532
2533         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2534         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2535         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2536         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2537
2538         memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2539 }
2540
2541 struct cfil_info *
2542 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2543 {
2544         int kcunit;
2545         struct cfil_info *cfil_info = NULL;
2546         struct inpcb *inp = sotoinpcb(so);
2547
2548         CFIL_LOG(LOG_INFO, "");
2549
2550         socket_lock_assert_owned(so);
2551
2552         cfil_info = zalloc(cfil_info_zone);
2553         if (cfil_info == NULL) {
2554                 goto done;
2555         }
2556         bzero(cfil_info, sizeof(struct cfil_info));
2557         os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2558
2559         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2560         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2561
2562         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2563                 struct cfil_entry *entry;
2564
2565                 entry = &cfil_info->cfi_entries[kcunit - 1];
2566                 entry->cfe_cfil_info = cfil_info;
2567
2568                 /* Initialize the filter entry */
2569                 entry->cfe_filter = NULL;
2570                 entry->cfe_flags = 0;
2571                 entry->cfe_necp_control_unit = 0;
2572                 entry->cfe_snd.cfe_pass_offset = 0;
2573                 entry->cfe_snd.cfe_peek_offset = 0;
2574                 entry->cfe_snd.cfe_peeked = 0;
2575                 entry->cfe_rcv.cfe_pass_offset = 0;
2576                 entry->cfe_rcv.cfe_peek_offset = 0;
2577                 entry->cfe_rcv.cfe_peeked = 0;
2578                 /*
2579                  * Timestamp the last action to avoid pre-maturely
2580                  * triggering garbage collection
2581                  */
2582                 microuptime(&entry->cfe_last_action);
2583
2584                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2585                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2586                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2587                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2588         }
2589
2590         cfil_rw_lock_exclusive(&cfil_lck_rw);
2591
2592         /*
2593          * Create a cfi_sock_id that's not the socket pointer!
2594          */
2595
2596         if (hash_entry == NULL) {
2597                 // This is the TCP case, cfil_info is tracked per socket
2598                 if (inp->inp_flowhash == 0) {
2599                         inp->inp_flowhash = inp_calc_flowhash(inp);
2600                 }
2601
2602                 so->so_cfil = cfil_info;
2603                 cfil_info->cfi_so = so;
2604                 cfil_info->cfi_sock_id =
2605                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2606         } else {
2607                 // This is the UDP case, cfil_info is tracked in per-socket hash
2608                 cfil_info->cfi_so = so;
2609                 hash_entry->cfentry_cfil = cfil_info;
2610                 cfil_info->cfi_hash_entry = hash_entry;
2611                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2612                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2613                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2614
2615                 // Wake up gc thread if this is first flow added
2616                 if (cfil_sock_udp_attached_count == 0) {
2617                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2618                 }
2619
2620                 cfil_sock_udp_attached_count++;
2621         }
2622
2623         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2624         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2625
2626         cfil_sock_attached_count++;
2627
2628         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2629
2630 done:
2631         if (cfil_info != NULL) {
2632                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2633         } else {
2634                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2635         }
2636
2637         return cfil_info;
2638 }
2639
2640 int
2641 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2642 {
2643         int kcunit;
2644         int attached = 0;
2645
2646         CFIL_LOG(LOG_INFO, "");
2647
2648         socket_lock_assert_owned(so);
2649
2650         cfil_rw_lock_exclusive(&cfil_lck_rw);
2651
2652         for (kcunit = 1;
2653             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2654             kcunit++) {
2655                 struct content_filter *cfc = content_filters[kcunit - 1];
2656                 struct cfil_entry *entry;
2657                 struct cfil_entry *iter_entry;
2658                 struct cfil_entry *iter_prev;
2659
2660                 if (cfc == NULL) {
2661                         continue;
2662                 }
2663                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2664                         continue;
2665                 }
2666
2667                 entry = &cfil_info->cfi_entries[kcunit - 1];
2668
2669                 entry->cfe_filter = cfc;
2670                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2671                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2672                 cfc->cf_sock_count++;
2673
2674                 /* Insert the entry into the list ordered by control unit */
2675                 iter_prev = NULL;
2676                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2677                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2678                                 break;
2679                         }
2680                         iter_prev = iter_entry;
2681                 }
2682
2683                 if (iter_prev == NULL) {
2684                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2685                 } else {
2686                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2687                 }
2688
2689                 verify_content_filter(cfc);
2690                 attached = 1;
2691                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2692         }
2693
2694         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2695
2696         return attached;
2697 }
2698
2699 static void
2700 cfil_info_free(struct cfil_info *cfil_info)
2701 {
2702         int kcunit;
2703         uint64_t in_drain = 0;
2704         uint64_t out_drained = 0;
2705
2706         if (cfil_info == NULL) {
2707                 return;
2708         }
2709
2710         CFIL_LOG(LOG_INFO, "");
2711
2712         cfil_rw_lock_exclusive(&cfil_lck_rw);
2713
2714         for (kcunit = 1;
2715             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2716             kcunit++) {
2717                 struct cfil_entry *entry;
2718                 struct content_filter *cfc;
2719
2720                 entry = &cfil_info->cfi_entries[kcunit - 1];
2721
2722                 /* Don't be silly and try to detach twice */
2723                 if (entry->cfe_filter == NULL) {
2724                         continue;
2725                 }
2726
2727                 cfc = content_filters[kcunit - 1];
2728
2729                 VERIFY(cfc == entry->cfe_filter);
2730
2731                 entry->cfe_filter = NULL;
2732                 entry->cfe_necp_control_unit = 0;
2733                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2734                 cfc->cf_sock_count--;
2735
2736                 verify_content_filter(cfc);
2737         }
2738         if (cfil_info->cfi_hash_entry != NULL) {
2739                 cfil_sock_udp_attached_count--;
2740         }
2741         cfil_sock_attached_count--;
2742         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2743
2744         // Turn off stats reporting for cfil_info.
2745         cfil_info_stats_toggle(cfil_info, NULL, 0);
2746
2747         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2748         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2749
2750         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2751                 struct cfil_entry *entry;
2752
2753                 entry = &cfil_info->cfi_entries[kcunit - 1];
2754                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2755                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2756                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2757                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2758         }
2759         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2760
2761         if (out_drained) {
2762                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2763         }
2764         if (in_drain) {
2765                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2766         }
2767
2768         zfree(cfil_info_zone, cfil_info);
2769 }
2770
2771 /*
2772  * Received a verdict from userspace for a socket.
2773  * Perform any delayed operation if needed.
2774  */
2775 static void
2776 cfil_sock_received_verdict(struct socket *so)
2777 {
2778         if (so == NULL || so->so_cfil == NULL) {
2779                 return;
2780         }
2781
2782         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2783
2784         /*
2785          * If socket has already been connected, trigger
2786          * soisconnected now.
2787          */
2788         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2789                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2790                 soisconnected(so);
2791                 return;
2792         }
2793 }
2794
2795 /*
2796  * Entry point from Sockets layer
2797  * The socket is locked.
2798  *
2799  * Checks if a connected socket is subject to filter and
2800  * pending the initial verdict.
2801  */
2802 boolean_t
2803 cfil_sock_connected_pending_verdict(struct socket *so)
2804 {
2805         if (so == NULL || so->so_cfil == NULL) {
2806                 return false;
2807         }
2808
2809         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2810                 return false;
2811         } else {
2812                 /*
2813                  * Remember that this protocol is already connected, so
2814                  * we will trigger soisconnected() upon receipt of
2815                  * initial verdict later.
2816                  */
2817                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2818                 return true;
2819         }
2820 }
2821
2822 boolean_t
2823 cfil_filter_present(void)
2824 {
2825         return cfil_active_count > 0;
2826 }
2827
2828 /*
2829  * Entry point from Sockets layer
2830  * The socket is locked.
2831  */
2832 errno_t
2833 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2834 {
2835         errno_t error = 0;
2836         uint32_t filter_control_unit;
2837
2838         socket_lock_assert_owned(so);
2839
2840         if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2841                 /*
2842                  * This socket has already been evaluated (and ultimately skipped) by
2843                  * flow divert, so it has also already been through content filter if there
2844                  * is one.
2845                  */
2846                 goto done;
2847         }
2848
2849         /* Limit ourselves to TCP that are not MPTCP subflows */
2850         if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2851                 goto done;
2852         }
2853
2854         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2855         if (filter_control_unit == 0) {
2856                 goto done;
2857         }
2858
2859         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2860                 goto done;
2861         }
2862         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2863                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2864                 goto done;
2865         }
2866         if (cfil_active_count == 0) {
2867                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2868                 goto done;
2869         }
2870         if (so->so_cfil != NULL) {
2871                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2872                 CFIL_LOG(LOG_ERR, "already attached");
2873                 goto done;
2874         } else {
2875                 cfil_info_alloc(so, NULL);
2876                 if (so->so_cfil == NULL) {
2877                         error = ENOMEM;
2878                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2879                         goto done;
2880                 }
2881                 so->so_cfil->cfi_dir = dir;
2882                 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
2883         }
2884         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2885                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2886                     filter_control_unit);
2887                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2888                 goto done;
2889         }
2890         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2891             (uint64_t)VM_KERNEL_ADDRPERM(so),
2892             filter_control_unit, so->so_cfil->cfi_sock_id);
2893
2894         so->so_flags |= SOF_CONTENT_FILTER;
2895         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2896
2897         /* Hold a reference on the socket */
2898         so->so_usecount++;
2899
2900         /*
2901          * Save passed addresses for attach event msg (in case resend
2902          * is needed.
2903          */
2904         if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
2905                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2906         }
2907         if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
2908                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2909         }
2910
2911         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2912         /* We can recover from flow control or out of memory errors */
2913         if (error == ENOBUFS || error == ENOMEM) {
2914                 error = 0;
2915         } else if (error != 0) {
2916                 goto done;
2917         }
2918
2919         CFIL_INFO_VERIFY(so->so_cfil);
2920 done:
2921         return error;
2922 }
2923
2924 /*
2925  * Entry point from Sockets layer
2926  * The socket is locked.
2927  */
2928 errno_t
2929 cfil_sock_detach(struct socket *so)
2930 {
2931         if (IS_IP_DGRAM(so)) {
2932                 cfil_db_free(so);
2933                 return 0;
2934         }
2935
2936         if (so->so_cfil) {
2937                 if (so->so_flags & SOF_CONTENT_FILTER) {
2938                         so->so_flags &= ~SOF_CONTENT_FILTER;
2939                         VERIFY(so->so_usecount > 0);
2940                         so->so_usecount--;
2941                 }
2942                 CFIL_INFO_FREE(so->so_cfil);
2943                 so->so_cfil = NULL;
2944                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2945         }
2946         return 0;
2947 }
2948
2949 /*
2950  * Fill in the address info of an event message from either
2951  * the socket or passed in address info.
2952  */
2953 static void
2954 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2955     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2956     boolean_t isIPv4, boolean_t outgoing)
2957 {
2958         if (isIPv4) {
2959                 struct in_addr laddr = {0}, faddr = {0};
2960                 u_int16_t lport = 0, fport = 0;
2961
2962                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2963
2964                 if (outgoing) {
2965                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2966                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2967                 } else {
2968                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2969                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2970                 }
2971         } else {
2972                 struct in6_addr *laddr = NULL, *faddr = NULL;
2973                 u_int16_t lport = 0, fport = 0;
2974
2975                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2976                 if (outgoing) {
2977                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2978                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2979                 } else {
2980                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2981                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2982                 }
2983         }
2984 }
2985
2986 static boolean_t
2987 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2988     struct cfil_info *cfil_info,
2989     struct cfil_msg_sock_attached *msg)
2990 {
2991         struct cfil_crypto_data data = {};
2992
2993         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2994                 return false;
2995         }
2996
2997         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2998         data.direction = msg->cfs_conn_dir;
2999
3000         data.pid = msg->cfs_pid;
3001         data.effective_pid = msg->cfs_e_pid;
3002         uuid_copy(data.uuid, msg->cfs_uuid);
3003         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
3004         data.socketProtocol = msg->cfs_sock_protocol;
3005         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3006                 data.remote.sin6 = msg->cfs_dst.sin6;
3007                 data.local.sin6 = msg->cfs_src.sin6;
3008         } else {
3009                 data.remote.sin6 = msg->cfs_src.sin6;
3010                 data.local.sin6 = msg->cfs_dst.sin6;
3011         }
3012
3013         // At attach, if local address is already present, no need to re-sign subsequent data messages.
3014         if (!NULLADDRESS(data.local)) {
3015                 cfil_info->cfi_isSignatureLatest = true;
3016         }
3017
3018         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3019         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3020                 msg->cfs_signature_length = 0;
3021                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
3022                     msg->cfs_msghdr.cfm_sock_id);
3023                 return false;
3024         }
3025
3026         return true;
3027 }
3028
3029 static boolean_t
3030 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3031     struct socket *so, struct cfil_info *cfil_info,
3032     struct cfil_msg_data_event *msg)
3033 {
3034         struct cfil_crypto_data data = {};
3035
3036         if (crypto_state == NULL || msg == NULL ||
3037             so == NULL || cfil_info == NULL) {
3038                 return false;
3039         }
3040
3041         data.sock_id = cfil_info->cfi_sock_id;
3042         data.direction = cfil_info->cfi_dir;
3043         data.pid = so->last_pid;
3044         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3045         if (so->so_flags & SOF_DELEGATED) {
3046                 data.effective_pid = so->e_pid;
3047                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3048         } else {
3049                 data.effective_pid = so->last_pid;
3050                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3051         }
3052         data.socketProtocol = so->so_proto->pr_protocol;
3053
3054         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3055                 data.remote.sin6 = msg->cfc_dst.sin6;
3056                 data.local.sin6 = msg->cfc_src.sin6;
3057         } else {
3058                 data.remote.sin6 = msg->cfc_src.sin6;
3059                 data.local.sin6 = msg->cfc_dst.sin6;
3060         }
3061
3062         // At first data, local address may show up for the first time, update address cache and
3063         // no need to re-sign subsequent data messages anymore.
3064         if (!NULLADDRESS(data.local)) {
3065                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3066                 cfil_info->cfi_isSignatureLatest = true;
3067         }
3068
3069         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3070         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3071                 msg->cfd_signature_length = 0;
3072                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3073                     msg->cfd_msghdr.cfm_sock_id);
3074                 return false;
3075         }
3076
3077         return true;
3078 }
3079
3080 static boolean_t
3081 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3082     struct socket *so, struct cfil_info *cfil_info,
3083     struct cfil_msg_sock_closed *msg)
3084 {
3085         struct cfil_crypto_data data = {};
3086         struct cfil_hash_entry hash_entry = {};
3087         struct cfil_hash_entry *hash_entry_ptr = NULL;
3088         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3089
3090         if (crypto_state == NULL || msg == NULL ||
3091             so == NULL || inp == NULL || cfil_info == NULL) {
3092                 return false;
3093         }
3094
3095         data.sock_id = cfil_info->cfi_sock_id;
3096         data.direction = cfil_info->cfi_dir;
3097
3098         data.pid = so->last_pid;
3099         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3100         if (so->so_flags & SOF_DELEGATED) {
3101                 data.effective_pid = so->e_pid;
3102                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3103         } else {
3104                 data.effective_pid = so->last_pid;
3105                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3106         }
3107         data.socketProtocol = so->so_proto->pr_protocol;
3108
3109         /*
3110          * Fill in address info:
3111          * For UDP, use the cfil_info hash entry directly.
3112          * For TCP, compose an hash entry with the saved addresses.
3113          */
3114         if (cfil_info->cfi_hash_entry != NULL) {
3115                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3116         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3117             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3118                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3119                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3120                 hash_entry_ptr = &hash_entry;
3121         }
3122         if (hash_entry_ptr != NULL) {
3123                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3124                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3125                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3126                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3127         }
3128
3129         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3130         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3131
3132         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3133         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3134                 msg->cfc_signature_length = 0;
3135                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3136                     msg->cfc_msghdr.cfm_sock_id);
3137                 return false;
3138         }
3139
3140         return true;
3141 }
3142
3143 static int
3144 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3145     uint32_t kcunit, int conn_dir)
3146 {
3147         errno_t error = 0;
3148         struct cfil_entry *entry = NULL;
3149         struct cfil_msg_sock_attached msg_attached;
3150         struct content_filter *cfc = NULL;
3151         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3152         struct cfil_hash_entry *hash_entry_ptr = NULL;
3153         struct cfil_hash_entry hash_entry;
3154
3155         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3156         proc_t p = PROC_NULL;
3157         task_t t = TASK_NULL;
3158
3159         socket_lock_assert_owned(so);
3160
3161         cfil_rw_lock_shared(&cfil_lck_rw);
3162
3163         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3164                 error = EINVAL;
3165                 goto done;
3166         }
3167
3168         if (kcunit == 0) {
3169                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3170         } else {
3171                 entry = &cfil_info->cfi_entries[kcunit - 1];
3172         }
3173
3174         if (entry == NULL) {
3175                 goto done;
3176         }
3177
3178         cfc = entry->cfe_filter;
3179         if (cfc == NULL) {
3180                 goto done;
3181         }
3182
3183         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3184                 goto done;
3185         }
3186
3187         if (kcunit == 0) {
3188                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3189         }
3190
3191         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3192             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3193
3194         /* Would be wasteful to try when flow controlled */
3195         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3196                 error = ENOBUFS;
3197                 goto done;
3198         }
3199
3200         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3201         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3202         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3203         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3204         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3205         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3206
3207         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3208         msg_attached.cfs_sock_type = so->so_proto->pr_type;
3209         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3210         msg_attached.cfs_pid = so->last_pid;
3211         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3212         if (so->so_flags & SOF_DELEGATED) {
3213                 msg_attached.cfs_e_pid = so->e_pid;
3214                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3215         } else {
3216                 msg_attached.cfs_e_pid = so->last_pid;
3217                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3218         }
3219
3220         /*
3221          * Fill in address info:
3222          * For UDP, use the cfil_info hash entry directly.
3223          * For TCP, compose an hash entry with the saved addresses.
3224          */
3225         if (cfil_info->cfi_hash_entry != NULL) {
3226                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3227         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3228             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3229                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3230                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3231                 hash_entry_ptr = &hash_entry;
3232         }
3233         if (hash_entry_ptr != NULL) {
3234                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3235                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
3236                     !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3237         }
3238         msg_attached.cfs_conn_dir = conn_dir;
3239
3240         if (msg_attached.cfs_e_pid != 0) {
3241                 p = proc_find(msg_attached.cfs_e_pid);
3242                 if (p != PROC_NULL) {
3243                         t = proc_task(p);
3244                         if (t != TASK_NULL) {
3245                                 audit_token_t audit_token;
3246                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3247                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3248                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3249                                 } else {
3250                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3251                                             entry->cfe_cfil_info->cfi_sock_id);
3252                                 }
3253                         }
3254                         proc_rele(p);
3255                 }
3256         }
3257
3258         if (cfil_info->cfi_debug) {
3259                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3260         }
3261
3262         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3263
3264 #if LIFECYCLE_DEBUG
3265         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3266             entry->cfe_cfil_info->cfi_sock_id);
3267 #endif
3268
3269         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3270             entry->cfe_filter->cf_kcunit,
3271             &msg_attached,
3272             sizeof(struct cfil_msg_sock_attached),
3273             CTL_DATA_EOR);
3274         if (error != 0) {
3275                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3276                 goto done;
3277         }
3278         microuptime(&entry->cfe_last_event);
3279         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3280         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3281
3282         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3283         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3284 done:
3285
3286         /* We can recover from flow control */
3287         if (error == ENOBUFS) {
3288                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3289                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3290
3291                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3292                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3293                 }
3294
3295                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3296
3297                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3298         } else {
3299                 if (error != 0) {
3300                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3301                 }
3302
3303                 cfil_rw_unlock_shared(&cfil_lck_rw);
3304         }
3305         return error;
3306 }
3307
3308 static int
3309 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3310 {
3311         errno_t error = 0;
3312         struct mbuf *msg = NULL;
3313         struct cfil_entry *entry;
3314         struct cfe_buf *entrybuf;
3315         struct cfil_msg_hdr msg_disconnected;
3316         struct content_filter *cfc;
3317
3318         socket_lock_assert_owned(so);
3319
3320         cfil_rw_lock_shared(&cfil_lck_rw);
3321
3322         entry = &cfil_info->cfi_entries[kcunit - 1];
3323         if (outgoing) {
3324                 entrybuf = &entry->cfe_snd;
3325         } else {
3326                 entrybuf = &entry->cfe_rcv;
3327         }
3328
3329         cfc = entry->cfe_filter;
3330         if (cfc == NULL) {
3331                 goto done;
3332         }
3333
3334         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3335             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3336
3337         /*
3338          * Send the disconnection event once
3339          */
3340         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3341             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3342                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3343                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3344                 goto done;
3345         }
3346
3347         /*
3348          * We're not disconnected as long as some data is waiting
3349          * to be delivered to the filter
3350          */
3351         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3352                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3353                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3354                 error = EBUSY;
3355                 goto done;
3356         }
3357         /* Would be wasteful to try when flow controlled */
3358         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3359                 error = ENOBUFS;
3360                 goto done;
3361         }
3362
3363         if (cfil_info->cfi_debug) {
3364                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3365         }
3366
3367 #if LIFECYCLE_DEBUG
3368         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3369             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3370             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3371 #endif
3372
3373         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3374         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3375         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3376         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3377         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3378             CFM_OP_DISCONNECT_IN;
3379         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3380         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3381             entry->cfe_filter->cf_kcunit,
3382             &msg_disconnected,
3383             sizeof(struct cfil_msg_hdr),
3384             CTL_DATA_EOR);
3385         if (error != 0) {
3386                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3387                 mbuf_freem(msg);
3388                 goto done;
3389         }
3390         microuptime(&entry->cfe_last_event);
3391         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3392
3393         /* Remember we have sent the disconnection message */
3394         if (outgoing) {
3395                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3396                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3397         } else {
3398                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3399                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3400         }
3401 done:
3402         if (error == ENOBUFS) {
3403                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3404                 OSIncrementAtomic(
3405                         &cfil_stats.cfs_disconnect_event_flow_control);
3406
3407                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3408                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3409                 }
3410
3411                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3412
3413                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3414         } else {
3415                 if (error != 0) {
3416                         OSIncrementAtomic(
3417                                 &cfil_stats.cfs_disconnect_event_fail);
3418                 }
3419
3420                 cfil_rw_unlock_shared(&cfil_lck_rw);
3421         }
3422         return error;
3423 }
3424
3425 int
3426 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3427 {
3428         struct cfil_entry *entry;
3429         struct cfil_msg_sock_closed msg_closed;
3430         errno_t error = 0;
3431         struct content_filter *cfc;
3432
3433         socket_lock_assert_owned(so);
3434
3435         cfil_rw_lock_shared(&cfil_lck_rw);
3436
3437         entry = &cfil_info->cfi_entries[kcunit - 1];
3438         cfc = entry->cfe_filter;
3439         if (cfc == NULL) {
3440                 goto done;
3441         }
3442
3443         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3444             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3445
3446         /* Would be wasteful to try when flow controlled */
3447         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3448                 error = ENOBUFS;
3449                 goto done;
3450         }
3451         /*
3452          * Send a single closed message per filter
3453          */
3454         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3455                 goto done;
3456         }
3457         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3458                 goto done;
3459         }
3460
3461         microuptime(&entry->cfe_last_event);
3462         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3463
3464         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3465         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3466         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3467         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3468         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3469         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3470         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3471         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3472         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3473         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3474         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3475         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3476         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3477
3478         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3479
3480         if (cfil_info->cfi_debug) {
3481                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3482         }
3483
3484 #if LIFECYCLE_DEBUG
3485         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3486 #endif
3487         /* for debugging
3488          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3489          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3490          *  }
3491          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3492          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3493          *  }
3494          */
3495
3496         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3497             entry->cfe_filter->cf_kcunit,
3498             &msg_closed,
3499             sizeof(struct cfil_msg_sock_closed),
3500             CTL_DATA_EOR);
3501         if (error != 0) {
3502                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3503                     error);
3504                 goto done;
3505         }
3506
3507         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3508         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3509 done:
3510         /* We can recover from flow control */
3511         if (error == ENOBUFS) {
3512                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3513                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3514
3515                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3516                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3517                 }
3518
3519                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3520
3521                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3522         } else {
3523                 if (error != 0) {
3524                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3525                 }
3526
3527                 cfil_rw_unlock_shared(&cfil_lck_rw);
3528         }
3529
3530         return error;
3531 }
3532
3533 static void
3534 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3535     struct in6_addr *ip6, u_int16_t port)
3536 {
3537         if (sin46 == NULL) {
3538                 return;
3539         }
3540
3541         struct sockaddr_in6 *sin6 = &sin46->sin6;
3542
3543         sin6->sin6_family = AF_INET6;
3544         sin6->sin6_len = sizeof(*sin6);
3545         sin6->sin6_port = port;
3546         sin6->sin6_addr = *ip6;
3547         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3548                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3549                 sin6->sin6_addr.s6_addr16[1] = 0;
3550         }
3551 }
3552
3553 static void
3554 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3555     struct in_addr ip, u_int16_t port)
3556 {
3557         if (sin46 == NULL) {
3558                 return;
3559         }
3560
3561         struct sockaddr_in *sin = &sin46->sin;
3562
3563         sin->sin_family = AF_INET;
3564         sin->sin_len = sizeof(*sin);
3565         sin->sin_port = port;
3566         sin->sin_addr.s_addr = ip.s_addr;
3567 }
3568
3569 static void
3570 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3571     struct in6_addr **laddr, struct in6_addr **faddr,
3572     u_int16_t *lport, u_int16_t *fport)
3573 {
3574         if (entry != NULL) {
3575                 *laddr = &entry->cfentry_laddr.addr6;
3576                 *faddr = &entry->cfentry_faddr.addr6;
3577                 *lport = entry->cfentry_lport;
3578                 *fport = entry->cfentry_fport;
3579         } else {
3580                 *laddr = &inp->in6p_laddr;
3581                 *faddr = &inp->in6p_faddr;
3582                 *lport = inp->inp_lport;
3583                 *fport = inp->inp_fport;
3584         }
3585 }
3586
3587 static void
3588 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3589     struct in_addr *laddr, struct in_addr *faddr,
3590     u_int16_t *lport, u_int16_t *fport)
3591 {
3592         if (entry != NULL) {
3593                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3594                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3595                 *lport = entry->cfentry_lport;
3596                 *fport = entry->cfentry_fport;
3597         } else {
3598                 *laddr = inp->inp_laddr;
3599                 *faddr = inp->inp_faddr;
3600                 *lport = inp->inp_lport;
3601                 *fport = inp->inp_fport;
3602         }
3603 }
3604
3605 static int
3606 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3607     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3608 {
3609         errno_t error = 0;
3610         struct mbuf *copy = NULL;
3611         struct mbuf *msg = NULL;
3612         unsigned int one = 1;
3613         struct cfil_msg_data_event *data_req;
3614         size_t hdrsize;
3615         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3616         struct cfil_entry *entry;
3617         struct cfe_buf *entrybuf;
3618         struct content_filter *cfc;
3619         struct timeval tv;
3620         int inp_flags = 0;
3621
3622         cfil_rw_lock_shared(&cfil_lck_rw);
3623
3624         entry = &cfil_info->cfi_entries[kcunit - 1];
3625         if (outgoing) {
3626                 entrybuf = &entry->cfe_snd;
3627         } else {
3628                 entrybuf = &entry->cfe_rcv;
3629         }
3630
3631         cfc = entry->cfe_filter;
3632         if (cfc == NULL) {
3633                 goto done;
3634         }
3635
3636         data = cfil_data_start(data);
3637         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3638                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3639                 goto done;
3640         }
3641
3642         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3643             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3644
3645         socket_lock_assert_owned(so);
3646
3647         /* Would be wasteful to try */
3648         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3649                 error = ENOBUFS;
3650                 goto done;
3651         }
3652
3653         /* Make a copy of the data to pass to kernel control socket */
3654         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3655             M_COPYM_NOOP_HDR);
3656         if (copy == NULL) {
3657                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3658                 error = ENOMEM;
3659                 goto done;
3660         }
3661
3662         /* We need an mbuf packet for the message header */
3663         hdrsize = sizeof(struct cfil_msg_data_event);
3664         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3665         if (error != 0) {
3666                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3667                 m_freem(copy);
3668                 /*
3669                  * ENOBUFS is to indicate flow control
3670                  */
3671                 error = ENOMEM;
3672                 goto done;
3673         }
3674         mbuf_setlen(msg, hdrsize);
3675         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3676         msg->m_next = copy;
3677         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3678         bzero(data_req, hdrsize);
3679         data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3680         data_req->cfd_msghdr.cfm_version = 1;
3681         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3682         data_req->cfd_msghdr.cfm_op =
3683             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3684         data_req->cfd_msghdr.cfm_sock_id =
3685             entry->cfe_cfil_info->cfi_sock_id;
3686         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3687         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3688
3689         data_req->cfd_flags = 0;
3690         if (OPTIONAL_IP_HEADER(so)) {
3691                 /*
3692                  * For non-UDP/TCP traffic, indicate to filters if optional
3693                  * IP header is present:
3694                  *      outgoing - indicate according to INP_HDRINCL flag
3695                  *      incoming - For IPv4 only, stripping of IP header is
3696                  *                 optional.  But for CFIL, we delay stripping
3697                  *                 at rip_input.  So CFIL always expects IP
3698                  *                 frames. IP header will be stripped according
3699                  *                 to INP_STRIPHDR flag later at reinjection.
3700                  */
3701                 if ((!outgoing && !IS_INP_V6(inp)) ||
3702                     (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3703                         data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3704                 }
3705         }
3706
3707         /*
3708          * Copy address/port into event msg.
3709          * For non connected sockets need to copy addresses from passed
3710          * parameters
3711          */
3712         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3713             &data_req->cfc_src, &data_req->cfc_dst,
3714             !IS_INP_V6(inp), outgoing);
3715
3716         if (cfil_info->cfi_debug) {
3717                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3718         }
3719
3720         if (cfil_info->cfi_isSignatureLatest == false) {
3721                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3722         }
3723
3724         microuptime(&tv);
3725         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3726
3727         /* Pass the message to the content filter */
3728         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3729             entry->cfe_filter->cf_kcunit,
3730             msg, CTL_DATA_EOR);
3731         if (error != 0) {
3732                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3733                 mbuf_freem(msg);
3734                 goto done;
3735         }
3736         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3737         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3738
3739 #if VERDICT_DEBUG
3740         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3741             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3742 #endif
3743
3744         if (cfil_info->cfi_debug) {
3745                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3746                     (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3747                     data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3748         }
3749
3750 done:
3751         if (error == ENOBUFS) {
3752                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3753                 OSIncrementAtomic(
3754                         &cfil_stats.cfs_data_event_flow_control);
3755
3756                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3757                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3758                 }
3759
3760                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3761
3762                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3763         } else {
3764                 if (error != 0) {
3765                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3766                 }
3767
3768                 cfil_rw_unlock_shared(&cfil_lck_rw);
3769         }
3770         return error;
3771 }
3772
3773 /*
3774  * Process the queue of data waiting to be delivered to content filter
3775  */
3776 static int
3777 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3778 {
3779         errno_t error = 0;
3780         struct mbuf *data, *tmp = NULL;
3781         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3782         struct cfil_entry *entry;
3783         struct cfe_buf *entrybuf;
3784         uint64_t currentoffset = 0;
3785
3786         if (cfil_info == NULL) {
3787                 return 0;
3788         }
3789
3790         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3791             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3792
3793         socket_lock_assert_owned(so);
3794
3795         entry = &cfil_info->cfi_entries[kcunit - 1];
3796         if (outgoing) {
3797                 entrybuf = &entry->cfe_snd;
3798         } else {
3799                 entrybuf = &entry->cfe_rcv;
3800         }
3801
3802         /* Send attached message if not yet done */
3803         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3804                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3805                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3806                 if (error != 0) {
3807                         /* We can recover from flow control */
3808                         if (error == ENOBUFS || error == ENOMEM) {
3809                                 error = 0;
3810                         }
3811                         goto done;
3812                 }
3813         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3814                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3815                 goto done;
3816         }
3817
3818 #if DATA_DEBUG
3819         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3820             entrybuf->cfe_pass_offset,
3821             entrybuf->cfe_peeked,
3822             entrybuf->cfe_peek_offset);
3823 #endif
3824
3825         /* Move all data that can pass */
3826         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3827             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3828                 datalen = cfil_data_length(data, NULL, NULL);
3829                 tmp = data;
3830
3831                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3832                     entrybuf->cfe_pass_offset) {
3833                         /*
3834                          * The first mbuf can fully pass
3835                          */
3836                         copylen = datalen;
3837                 } else {
3838                         /*
3839                          * The first mbuf can partially pass
3840                          */
3841                         copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
3842                 }
3843                 VERIFY(copylen <= datalen);
3844
3845 #if DATA_DEBUG
3846                 CFIL_LOG(LOG_DEBUG,
3847                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3848                     "datalen %u copylen %u",
3849                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3850                     entrybuf->cfe_ctl_q.q_start,
3851                     entrybuf->cfe_peeked,
3852                     entrybuf->cfe_pass_offset,
3853                     entrybuf->cfe_peek_offset,
3854                     datalen, copylen);
3855 #endif
3856
3857                 /*
3858                  * Data that passes has been peeked at explicitly or
3859                  * implicitly
3860                  */
3861                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3862                     entrybuf->cfe_peeked) {
3863                         entrybuf->cfe_peeked =
3864                             entrybuf->cfe_ctl_q.q_start + copylen;
3865                 }
3866                 /*
3867                  * Stop on partial pass
3868                  */
3869                 if (copylen < datalen) {
3870                         break;
3871                 }
3872
3873                 /* All good, move full data from ctl queue to pending queue */
3874                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3875
3876                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3877                 if (outgoing) {
3878                         OSAddAtomic64(datalen,
3879                             &cfil_stats.cfs_pending_q_out_enqueued);
3880                 } else {
3881                         OSAddAtomic64(datalen,
3882                             &cfil_stats.cfs_pending_q_in_enqueued);
3883                 }
3884         }
3885         CFIL_INFO_VERIFY(cfil_info);
3886         if (tmp != NULL) {
3887                 CFIL_LOG(LOG_DEBUG,
3888                     "%llx first %llu peeked %llu pass %llu peek %llu"
3889                     "datalen %u copylen %u",
3890                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3891                     entrybuf->cfe_ctl_q.q_start,
3892                     entrybuf->cfe_peeked,
3893                     entrybuf->cfe_pass_offset,
3894                     entrybuf->cfe_peek_offset,
3895                     datalen, copylen);
3896         }
3897         tmp = NULL;
3898
3899         /* Now deal with remaining data the filter wants to peek at */
3900         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3901             currentoffset = entrybuf->cfe_ctl_q.q_start;
3902             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3903             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3904             currentoffset += datalen) {
3905                 datalen = cfil_data_length(data, NULL, NULL);
3906                 tmp = data;
3907
3908                 /* We've already peeked at this mbuf */
3909                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3910                         continue;
3911                 }
3912                 /*
3913                  * The data in the first mbuf may have been
3914                  * partially peeked at
3915                  */
3916                 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
3917                 VERIFY(copyoffset < datalen);
3918                 copylen = datalen - copyoffset;
3919                 VERIFY(copylen <= datalen);
3920                 /*
3921                  * Do not copy more than needed
3922                  */
3923                 if (currentoffset + copyoffset + copylen >
3924                     entrybuf->cfe_peek_offset) {
3925                         copylen = (unsigned int)(entrybuf->cfe_peek_offset -
3926                             (currentoffset + copyoffset));
3927                 }
3928
3929 #if DATA_DEBUG
3930                 CFIL_LOG(LOG_DEBUG,
3931                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3932                     "datalen %u copylen %u copyoffset %u",
3933                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3934                     currentoffset,
3935                     entrybuf->cfe_peeked,
3936                     entrybuf->cfe_pass_offset,
3937                     entrybuf->cfe_peek_offset,
3938                     datalen, copylen, copyoffset);
3939 #endif
3940
3941                 /*
3942                  * Stop if there is nothing more to peek at
3943                  */
3944                 if (copylen == 0) {
3945                         break;
3946                 }
3947                 /*
3948                  * Let the filter get a peek at this span of data
3949                  */
3950                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3951                     outgoing, data, copyoffset, copylen);
3952                 if (error != 0) {
3953                         /* On error, leave data in ctl_q */
3954                         break;
3955                 }
3956                 entrybuf->cfe_peeked += copylen;
3957                 if (outgoing) {
3958                         OSAddAtomic64(copylen,
3959                             &cfil_stats.cfs_ctl_q_out_peeked);
3960                 } else {
3961                         OSAddAtomic64(copylen,
3962                             &cfil_stats.cfs_ctl_q_in_peeked);
3963                 }
3964
3965                 /* Stop when data could not be fully peeked at */
3966                 if (copylen + copyoffset < datalen) {
3967                         break;
3968                 }
3969         }
3970         CFIL_INFO_VERIFY(cfil_info);
3971         if (tmp != NULL) {
3972                 CFIL_LOG(LOG_DEBUG,
3973                     "%llx first %llu peeked %llu pass %llu peek %llu"
3974                     "datalen %u copylen %u copyoffset %u",
3975                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3976                     currentoffset,
3977                     entrybuf->cfe_peeked,
3978                     entrybuf->cfe_pass_offset,
3979                     entrybuf->cfe_peek_offset,
3980                     datalen, copylen, copyoffset);
3981         }
3982
3983         /*
3984          * Process data that has passed the filter
3985          */
3986         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3987         if (error != 0) {
3988                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3989                     error);
3990                 goto done;
3991         }
3992
3993         /*
3994          * Dispatch disconnect events that could not be sent
3995          */
3996         if (cfil_info == NULL) {
3997                 goto done;
3998         } else if (outgoing) {
3999                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4000                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4001                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
4002                 }
4003         } else {
4004                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4005                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4006                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
4007                 }
4008         }
4009
4010 done:
4011         CFIL_LOG(LOG_DEBUG,
4012             "first %llu peeked %llu pass %llu peek %llu",
4013             entrybuf->cfe_ctl_q.q_start,
4014             entrybuf->cfe_peeked,
4015             entrybuf->cfe_pass_offset,
4016             entrybuf->cfe_peek_offset);
4017
4018         CFIL_INFO_VERIFY(cfil_info);
4019         return error;
4020 }
4021
4022 /*
4023  * cfil_data_filter()
4024  *
4025  * Process data for a content filter installed on a socket
4026  */
4027 int
4028 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4029     struct mbuf *data, uint32_t datalen)
4030 {
4031         errno_t error = 0;
4032         struct cfil_entry *entry;
4033         struct cfe_buf *entrybuf;
4034
4035         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4036             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4037
4038         socket_lock_assert_owned(so);
4039
4040         entry = &cfil_info->cfi_entries[kcunit - 1];
4041         if (outgoing) {
4042                 entrybuf = &entry->cfe_snd;
4043         } else {
4044                 entrybuf = &entry->cfe_rcv;
4045         }
4046
4047         /* Are we attached to the filter? */
4048         if (entry->cfe_filter == NULL) {
4049                 error = 0;
4050                 goto done;
4051         }
4052
4053         /* Dispatch to filters */
4054         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4055         if (outgoing) {
4056                 OSAddAtomic64(datalen,
4057                     &cfil_stats.cfs_ctl_q_out_enqueued);
4058         } else {
4059                 OSAddAtomic64(datalen,
4060                     &cfil_stats.cfs_ctl_q_in_enqueued);
4061         }
4062
4063         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4064         if (error != 0) {
4065                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4066                     error);
4067         }
4068         /*
4069          * We have to return EJUSTRETURN in all cases to avoid double free
4070          * by socket layer
4071          */
4072         error = EJUSTRETURN;
4073 done:
4074         CFIL_INFO_VERIFY(cfil_info);
4075
4076         CFIL_LOG(LOG_INFO, "return %d", error);
4077         return error;
4078 }
4079
4080 /*
4081  * cfil_service_inject_queue() re-inject data that passed the
4082  * content filters
4083  */
4084 static int
4085 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4086 {
4087         mbuf_t data;
4088         unsigned int datalen;
4089         int mbcnt = 0;
4090         int mbnum = 0;
4091         errno_t error = 0;
4092         struct cfi_buf *cfi_buf;
4093         struct cfil_queue *inject_q;
4094         int need_rwakeup = 0;
4095         int count = 0;
4096         struct inpcb *inp = NULL;
4097         struct ip *ip = NULL;
4098         unsigned int hlen;
4099
4100         if (cfil_info == NULL) {
4101                 return 0;
4102         }
4103
4104         socket_lock_assert_owned(so);
4105
4106         if (outgoing) {
4107                 cfi_buf = &cfil_info->cfi_snd;
4108                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4109         } else {
4110                 cfi_buf = &cfil_info->cfi_rcv;
4111                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4112         }
4113         inject_q = &cfi_buf->cfi_inject_q;
4114
4115         if (cfil_queue_empty(inject_q)) {
4116                 return 0;
4117         }
4118
4119 #if DATA_DEBUG | VERDICT_DEBUG
4120         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4121             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4122 #endif
4123
4124         while ((data = cfil_queue_first(inject_q)) != NULL) {
4125                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4126
4127 #if DATA_DEBUG
4128                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4129                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4130 #endif
4131                 if (cfil_info->cfi_debug) {
4132                         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4133                             (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4134                 }
4135
4136                 /* Remove data from queue and adjust stats */
4137                 cfil_queue_remove(inject_q, data, datalen);
4138                 cfi_buf->cfi_pending_first += datalen;
4139                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4140                 cfi_buf->cfi_pending_mbnum -= mbnum;
4141                 cfil_info_buf_verify(cfi_buf);
4142
4143                 if (outgoing) {
4144                         error = sosend_reinject(so, NULL, data, NULL, 0);
4145                         if (error != 0) {
4146 #if DATA_DEBUG
4147                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4148                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4149 #endif
4150                                 break;
4151                         }
4152                         // At least one injection succeeded, need to wake up pending threads.
4153                         need_rwakeup = 1;
4154                 } else {
4155                         data->m_flags |= M_SKIPCFIL;
4156
4157                         /*
4158                          * NOTE: We currently only support TCP, UDP, ICMP,
4159                          * ICMPv6 and RAWIP.  For MPTCP and message TCP we'll
4160                          * need to call the appropriate sbappendxxx()
4161                          * of fix sock_inject_data_in()
4162                          */
4163                         if (IS_IP_DGRAM(so)) {
4164                                 if (OPTIONAL_IP_HEADER(so)) {
4165                                         inp = sotoinpcb(so);
4166                                         if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4167                                                 mbuf_t data_start = cfil_data_start(data);
4168                                                 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4169                                                         ip = mtod(data_start, struct ip *);
4170                                                         hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4171                                                         data_start->m_len -= hlen;
4172                                                         data_start->m_pkthdr.len -= hlen;
4173                                                         data_start->m_data += hlen;
4174                                                 }
4175                                         }
4176                                 }
4177
4178                                 if (sbappendchain(&so->so_rcv, data, 0)) {
4179                                         need_rwakeup = 1;
4180                                 }
4181                         } else {
4182                                 if (sbappendstream(&so->so_rcv, data)) {
4183                                         need_rwakeup = 1;
4184                                 }
4185                         }
4186                 }
4187
4188                 if (outgoing) {
4189                         OSAddAtomic64(datalen,
4190                             &cfil_stats.cfs_inject_q_out_passed);
4191                 } else {
4192                         OSAddAtomic64(datalen,
4193                             &cfil_stats.cfs_inject_q_in_passed);
4194                 }
4195
4196                 count++;
4197         }
4198
4199 #if DATA_DEBUG | VERDICT_DEBUG
4200         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4201             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4202 #endif
4203         if (cfil_info->cfi_debug) {
4204                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4205                     (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4206         }
4207
4208         /* A single wakeup is for several packets is more efficient */
4209         if (need_rwakeup) {
4210                 if (outgoing == TRUE) {
4211                         sowwakeup(so);
4212                 } else {
4213                         sorwakeup(so);
4214                 }
4215         }
4216
4217         if (error != 0 && cfil_info) {
4218                 if (error == ENOBUFS) {
4219                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4220                 }
4221                 if (error == ENOMEM) {
4222                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4223                 }
4224
4225                 if (outgoing) {
4226                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4227                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4228                 } else {
4229                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4230                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4231                 }
4232         }
4233
4234         /*
4235          * Notify
4236          */
4237         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4238                 cfil_sock_notify_shutdown(so, SHUT_WR);
4239                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4240                         soshutdownlock_final(so, SHUT_WR);
4241                 }
4242         }
4243         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4244                 if (cfil_filters_attached(so) == 0) {
4245                         CFIL_LOG(LOG_INFO, "so %llx waking",
4246                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4247                         wakeup((caddr_t)cfil_info);
4248                 }
4249         }
4250
4251         CFIL_INFO_VERIFY(cfil_info);
4252
4253         return error;
4254 }
4255
4256 static int
4257 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4258 {
4259         uint64_t passlen, curlen;
4260         mbuf_t data;
4261         unsigned int datalen;
4262         errno_t error = 0;
4263         struct cfil_entry *entry;
4264         struct cfe_buf *entrybuf;
4265         struct cfil_queue *pending_q;
4266         struct cfil_entry *iter_entry = NULL;
4267
4268         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4269             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4270
4271         socket_lock_assert_owned(so);
4272
4273         entry = &cfil_info->cfi_entries[kcunit - 1];
4274         if (outgoing) {
4275                 entrybuf = &entry->cfe_snd;
4276         } else {
4277                 entrybuf = &entry->cfe_rcv;
4278         }
4279
4280         pending_q = &entrybuf->cfe_pending_q;
4281
4282         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4283
4284         if (cfil_queue_empty(pending_q)) {
4285                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4286                     iter_entry != NULL;
4287                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4288                         error = cfil_data_service_ctl_q(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing);
4289                         /* 0 means passed so we can continue */
4290                         if (error != 0) {
4291                                 break;
4292                         }
4293                 }
4294                 goto done;
4295         }
4296
4297         /*
4298          * Locate the chunks of data that we can pass to the next filter
4299          * A data chunk must be on mbuf boundaries
4300          */
4301         curlen = 0;
4302         while ((data = cfil_queue_first(pending_q)) != NULL) {
4303                 datalen = cfil_data_length(data, NULL, NULL);
4304
4305 #if DATA_DEBUG
4306                 CFIL_LOG(LOG_DEBUG,
4307                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4308                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4309                     passlen, curlen);
4310 #endif
4311
4312                 if (curlen + datalen > passlen) {
4313                         break;
4314                 }
4315
4316                 cfil_queue_remove(pending_q, data, datalen);
4317
4318                 curlen += datalen;
4319
4320                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4321                     iter_entry != NULL;
4322                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4323                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4324                             data, datalen);
4325                         /* 0 means passed so we can continue */
4326                         if (error != 0) {
4327                                 break;
4328                         }
4329                 }
4330                 /* When data has passed all filters, re-inject */
4331                 if (error == 0) {
4332                         if (outgoing) {
4333                                 cfil_queue_enqueue(
4334                                         &cfil_info->cfi_snd.cfi_inject_q,
4335                                         data, datalen);
4336                                 OSAddAtomic64(datalen,
4337                                     &cfil_stats.cfs_inject_q_out_enqueued);
4338                         } else {
4339                                 cfil_queue_enqueue(
4340                                         &cfil_info->cfi_rcv.cfi_inject_q,
4341                                         data, datalen);
4342                                 OSAddAtomic64(datalen,
4343                                     &cfil_stats.cfs_inject_q_in_enqueued);
4344                         }
4345                 }
4346         }
4347
4348 done:
4349         CFIL_INFO_VERIFY(cfil_info);
4350
4351         return error;
4352 }
4353
4354 int
4355 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4356     uint64_t pass_offset, uint64_t peek_offset)
4357 {
4358         errno_t error = 0;
4359         struct cfil_entry *entry = NULL;
4360         struct cfe_buf *entrybuf;
4361         int updated = 0;
4362
4363         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4364
4365         socket_lock_assert_owned(so);
4366
4367         if (cfil_info == NULL) {
4368                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4369                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4370                 error = 0;
4371                 goto done;
4372         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4373                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4374                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4375                 error = EPIPE;
4376                 goto done;
4377         }
4378
4379         entry = &cfil_info->cfi_entries[kcunit - 1];
4380         if (outgoing) {
4381                 entrybuf = &entry->cfe_snd;
4382         } else {
4383                 entrybuf = &entry->cfe_rcv;
4384         }
4385
4386         /* Record updated offsets for this content filter */
4387         if (pass_offset > entrybuf->cfe_pass_offset) {
4388                 entrybuf->cfe_pass_offset = pass_offset;
4389
4390                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4391                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4392                 }
4393                 updated = 1;
4394         } else {
4395                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4396                     pass_offset, entrybuf->cfe_pass_offset);
4397         }
4398         /* Filter does not want or need to see data that's allowed to pass */
4399         if (peek_offset > entrybuf->cfe_pass_offset &&
4400             peek_offset > entrybuf->cfe_peek_offset) {
4401                 entrybuf->cfe_peek_offset = peek_offset;
4402                 updated = 1;
4403         }
4404         /* Nothing to do */
4405         if (updated == 0) {
4406                 goto done;
4407         }
4408
4409         /* Move data held in control queue to pending queue if needed */
4410         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4411         if (error != 0) {
4412                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4413                     error);
4414                 goto done;
4415         }
4416         error = EJUSTRETURN;
4417
4418 done:
4419         /*
4420          * The filter is effectively detached when pass all from both sides
4421          * or when the socket is closed and no more data is waiting
4422          * to be delivered to the filter
4423          */
4424         if (entry != NULL &&
4425             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4426             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4427             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4428             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4429             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4430                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4431 #if LIFECYCLE_DEBUG
4432                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4433                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4434                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4435 #endif
4436                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4437                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4438                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4439                     cfil_filters_attached(so) == 0) {
4440 #if LIFECYCLE_DEBUG
4441                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4442 #endif
4443                         CFIL_LOG(LOG_INFO, "so %llx waking",
4444                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4445                         wakeup((caddr_t)cfil_info);
4446                 }
4447         }
4448         CFIL_INFO_VERIFY(cfil_info);
4449         CFIL_LOG(LOG_INFO, "return %d", error);
4450         return error;
4451 }
4452
4453 /*
4454  * Update pass offset for socket when no data is pending
4455  */
4456 static int
4457 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4458 {
4459         struct cfi_buf *cfi_buf;
4460         struct cfil_entry *entry;
4461         struct cfe_buf *entrybuf;
4462         uint32_t kcunit;
4463         uint64_t pass_offset = 0;
4464         boolean_t first = true;
4465
4466         if (cfil_info == NULL) {
4467                 return 0;
4468         }
4469
4470         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4471             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4472
4473         socket_lock_assert_owned(so);
4474
4475         if (outgoing) {
4476                 cfi_buf = &cfil_info->cfi_snd;
4477         } else {
4478                 cfi_buf = &cfil_info->cfi_rcv;
4479         }
4480
4481         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4482             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4483             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4484
4485         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4486                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4487                         entry = &cfil_info->cfi_entries[kcunit - 1];
4488
4489                         /* Are we attached to a filter? */
4490                         if (entry->cfe_filter == NULL) {
4491                                 continue;
4492                         }
4493
4494                         if (outgoing) {
4495                                 entrybuf = &entry->cfe_snd;
4496                         } else {
4497                                 entrybuf = &entry->cfe_rcv;
4498                         }
4499
4500                         // Keep track of the smallest pass_offset among filters.
4501                         if (first == true ||
4502                             entrybuf->cfe_pass_offset < pass_offset) {
4503                                 pass_offset = entrybuf->cfe_pass_offset;
4504                                 first = false;
4505                         }
4506                 }
4507                 cfi_buf->cfi_pass_offset = pass_offset;
4508         }
4509
4510         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4511             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4512
4513         return 0;
4514 }
4515
4516 int
4517 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4518     uint64_t pass_offset, uint64_t peek_offset)
4519 {
4520         errno_t error = 0;
4521
4522         CFIL_LOG(LOG_INFO, "");
4523
4524         socket_lock_assert_owned(so);
4525
4526         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4527         if (error != 0) {
4528                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4529                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4530                     outgoing ? "out" : "in");
4531                 goto release;
4532         }
4533
4534         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4535             pass_offset, peek_offset);
4536
4537         cfil_service_inject_queue(so, cfil_info, outgoing);
4538
4539         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4540 release:
4541         CFIL_INFO_VERIFY(cfil_info);
4542         cfil_release_sockbuf(so, outgoing);
4543
4544         return error;
4545 }
4546
4547
4548 static void
4549 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4550 {
4551         struct cfil_entry *entry;
4552         int kcunit;
4553         uint64_t drained;
4554
4555         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4556                 goto done;
4557         }
4558
4559         socket_lock_assert_owned(so);
4560
4561         /*
4562          * Flush the output queues and ignore errors as long as
4563          * we are attached
4564          */
4565         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4566         if (cfil_info != NULL) {
4567                 drained = 0;
4568                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4569                         entry = &cfil_info->cfi_entries[kcunit - 1];
4570
4571                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4572                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4573                 }
4574                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4575
4576                 if (drained) {
4577                         if (cfil_info->cfi_flags & CFIF_DROP) {
4578                                 OSIncrementAtomic(
4579                                         &cfil_stats.cfs_flush_out_drop);
4580                         } else {
4581                                 OSIncrementAtomic(
4582                                         &cfil_stats.cfs_flush_out_close);
4583                         }
4584                 }
4585         }
4586         cfil_release_sockbuf(so, 1);
4587
4588         /*
4589          * Flush the input queues
4590          */
4591         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4592         if (cfil_info != NULL) {
4593                 drained = 0;
4594                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4595                         entry = &cfil_info->cfi_entries[kcunit - 1];
4596
4597                         drained += cfil_queue_drain(
4598                                 &entry->cfe_rcv.cfe_ctl_q);
4599                         drained += cfil_queue_drain(
4600                                 &entry->cfe_rcv.cfe_pending_q);
4601                 }
4602                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4603
4604                 if (drained) {
4605                         if (cfil_info->cfi_flags & CFIF_DROP) {
4606                                 OSIncrementAtomic(
4607                                         &cfil_stats.cfs_flush_in_drop);
4608                         } else {
4609                                 OSIncrementAtomic(
4610                                         &cfil_stats.cfs_flush_in_close);
4611                         }
4612                 }
4613         }
4614         cfil_release_sockbuf(so, 0);
4615 done:
4616         CFIL_INFO_VERIFY(cfil_info);
4617 }
4618
4619 int
4620 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4621 {
4622         errno_t error = 0;
4623         struct cfil_entry *entry;
4624         struct proc *p;
4625
4626         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4627                 goto done;
4628         }
4629
4630         socket_lock_assert_owned(so);
4631
4632         entry = &cfil_info->cfi_entries[kcunit - 1];
4633
4634         /* Are we attached to the filter? */
4635         if (entry->cfe_filter == NULL) {
4636                 goto done;
4637         }
4638
4639         cfil_info->cfi_flags |= CFIF_DROP;
4640
4641         p = current_proc();
4642
4643         /*
4644          * Force the socket to be marked defunct
4645          * (forcing fixed along with rdar://19391339)
4646          */
4647         if (so->so_cfil_db == NULL) {
4648                 error = sosetdefunct(p, so,
4649                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4650                     FALSE);
4651
4652                 /* Flush the socket buffer and disconnect */
4653                 if (error == 0) {
4654                         error = sodefunct(p, so,
4655                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4656                 }
4657         }
4658
4659         /* The filter is done, mark as detached */
4660         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4661 #if LIFECYCLE_DEBUG
4662         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4663 #endif
4664         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4665             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4666
4667         /* Pending data needs to go */
4668         cfil_flush_queues(so, cfil_info);
4669
4670         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4671                 if (cfil_filters_attached(so) == 0) {
4672                         CFIL_LOG(LOG_INFO, "so %llx waking",
4673                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4674                         wakeup((caddr_t)cfil_info);
4675                 }
4676         }
4677 done:
4678         return error;
4679 }
4680
4681 int
4682 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4683 {
4684         errno_t error = 0;
4685         struct cfil_info *cfil_info = NULL;
4686
4687         bool cfil_attached = false;
4688         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4689
4690         // Search and lock socket
4691         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4692         if (so == NULL) {
4693                 error = ENOENT;
4694         } else {
4695                 // The client gets a pass automatically
4696                 cfil_info = (so->so_cfil_db != NULL) ?
4697                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4698
4699                 if (cfil_attached) {
4700 #if VERDICT_DEBUG
4701                         if (cfil_info != NULL) {
4702                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4703                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4704                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4705                                     cfil_info->cfi_sock_id);
4706                         }
4707 #endif
4708                         cfil_sock_received_verdict(so);
4709                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4710                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4711                 } else {
4712                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4713                 }
4714                 socket_unlock(so, 1);
4715         }
4716
4717         return error;
4718 }
4719
4720 int
4721 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4722 {
4723         struct content_filter *cfc = NULL;
4724         cfil_crypto_state_t crypto_state = NULL;
4725         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4726
4727         CFIL_LOG(LOG_NOTICE, "");
4728
4729         if (content_filters == NULL) {
4730                 CFIL_LOG(LOG_ERR, "no content filter");
4731                 return EINVAL;
4732         }
4733         if (kcunit > MAX_CONTENT_FILTER) {
4734                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4735                     kcunit, MAX_CONTENT_FILTER);
4736                 return EINVAL;
4737         }
4738         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4739         if (crypto_state == NULL) {
4740                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4741                     kcunit);
4742                 return EINVAL;
4743         }
4744
4745         cfil_rw_lock_exclusive(&cfil_lck_rw);
4746
4747         cfc = content_filters[kcunit - 1];
4748         if (cfc->cf_kcunit != kcunit) {
4749                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4750                     kcunit);
4751                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4752                 cfil_crypto_cleanup_state(crypto_state);
4753                 return EINVAL;
4754         }
4755         if (cfc->cf_crypto_state != NULL) {
4756                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4757                 cfc->cf_crypto_state = NULL;
4758         }
4759         cfc->cf_crypto_state = crypto_state;
4760
4761         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4762         return 0;
4763 }
4764
4765 static int
4766 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4767 {
4768         struct cfil_entry *entry;
4769         struct cfe_buf *entrybuf;
4770         uint32_t kcunit;
4771
4772         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4773             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4774
4775         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4776                 entry = &cfil_info->cfi_entries[kcunit - 1];
4777
4778                 /* Are we attached to the filter? */
4779                 if (entry->cfe_filter == NULL) {
4780                         continue;
4781                 }
4782
4783                 if (outgoing) {
4784                         entrybuf = &entry->cfe_snd;
4785                 } else {
4786                         entrybuf = &entry->cfe_rcv;
4787                 }
4788
4789                 entrybuf->cfe_ctl_q.q_start += datalen;
4790                 if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
4791                         entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4792                 }
4793                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4794                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4795                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4796                 }
4797
4798                 entrybuf->cfe_ctl_q.q_end += datalen;
4799
4800                 entrybuf->cfe_pending_q.q_start += datalen;
4801                 entrybuf->cfe_pending_q.q_end += datalen;
4802         }
4803         CFIL_INFO_VERIFY(cfil_info);
4804         return 0;
4805 }
4806
4807 int
4808 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4809     struct mbuf *data, struct mbuf *control, uint32_t flags)
4810 {
4811 #pragma unused(to, control, flags)
4812         errno_t error = 0;
4813         unsigned int datalen;
4814         int mbcnt = 0;
4815         int mbnum = 0;
4816         int kcunit;
4817         struct cfi_buf *cfi_buf;
4818         struct mbuf *chain = NULL;
4819
4820         if (cfil_info == NULL) {
4821                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4822                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4823                 error = 0;
4824                 goto done;
4825         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4826                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4827                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4828                 error = EPIPE;
4829                 goto done;
4830         }
4831
4832         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4833
4834         if (datalen == 0) {
4835                 error = 0;
4836                 goto done;
4837         }
4838
4839         if (outgoing) {
4840                 cfi_buf = &cfil_info->cfi_snd;
4841                 cfil_info->cfi_byte_outbound_count += datalen;
4842         } else {
4843                 cfi_buf = &cfil_info->cfi_rcv;
4844                 cfil_info->cfi_byte_inbound_count += datalen;
4845         }
4846
4847         cfi_buf->cfi_pending_last += datalen;
4848         cfi_buf->cfi_pending_mbcnt += mbcnt;
4849         cfi_buf->cfi_pending_mbnum += mbnum;
4850
4851         if (IS_IP_DGRAM(so)) {
4852                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4853                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4854                         cfi_buf->cfi_tail_drop_cnt++;
4855                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4856                         cfi_buf->cfi_pending_mbnum -= mbnum;
4857                         return EPIPE;
4858                 }
4859         }
4860
4861         cfil_info_buf_verify(cfi_buf);
4862
4863 #if DATA_DEBUG
4864         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4865             (uint64_t)VM_KERNEL_ADDRPERM(so),
4866             outgoing ? "OUT" : "IN",
4867             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4868             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4869             cfi_buf->cfi_pending_last,
4870             cfi_buf->cfi_pending_mbcnt,
4871             cfi_buf->cfi_pass_offset);
4872 #endif
4873
4874         /* Fast path when below pass offset */
4875         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4876                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4877 #if DATA_DEBUG
4878                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4879 #endif
4880         } else {
4881                 struct cfil_entry *iter_entry;
4882                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4883                         // Is cfil attached to this filter?
4884                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4885                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4886                                 if (IS_IP_DGRAM(so) && chain == NULL) {
4887                                         /* Datagrams only:
4888                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4889                                          * This full chain will be reinjected into socket after recieving verdict.
4890                                          */
4891                                         (void) cfil_dgram_save_socket_state(cfil_info, data);
4892                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4893                                         if (chain == NULL) {
4894                                                 return ENOBUFS;
4895                                         }
4896                                         data = chain;
4897                                 }
4898                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4899                                     datalen);
4900                         }
4901                         /* 0 means passed so continue with next filter */
4902                         if (error != 0) {
4903                                 break;
4904                         }
4905                 }
4906         }
4907
4908         /* Move cursor if no filter claimed the data */
4909         if (error == 0) {
4910                 cfi_buf->cfi_pending_first += datalen;
4911                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4912                 cfi_buf->cfi_pending_mbnum -= mbnum;
4913                 cfil_info_buf_verify(cfi_buf);
4914         }
4915 done:
4916         CFIL_INFO_VERIFY(cfil_info);
4917
4918         return error;
4919 }
4920
4921 /*
4922  * Callback from socket layer sosendxxx()
4923  */
4924 int
4925 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4926     struct mbuf *data, struct mbuf *control, uint32_t flags)
4927 {
4928         int error = 0;
4929         int new_filter_control_unit = 0;
4930
4931         if (IS_IP_DGRAM(so)) {
4932                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4933         }
4934
4935         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4936                 /* Drop pre-existing TCP sockets if filter is enabled now */
4937                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4938                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4939                         if (new_filter_control_unit > 0) {
4940                                 return EPIPE;
4941                         }
4942                 }
4943                 return 0;
4944         }
4945
4946         /* Drop pre-existing TCP sockets when filter state changed */
4947         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4948         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4949                 return EPIPE;
4950         }
4951
4952         /*
4953          * Pass initial data for TFO.
4954          */
4955         if (IS_INITIAL_TFO_DATA(so)) {
4956                 return 0;
4957         }
4958
4959         socket_lock_assert_owned(so);
4960
4961         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4962                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4963                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4964                 return EPIPE;
4965         }
4966         if (control != NULL) {
4967                 CFIL_LOG(LOG_ERR, "so %llx control",
4968                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4969                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4970         }
4971         if ((flags & MSG_OOB)) {
4972                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4973                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4974                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4975         }
4976         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4977                 panic("so %p SB_LOCK not set", so);
4978         }
4979
4980         if (so->so_snd.sb_cfil_thread != NULL) {
4981                 panic("%s sb_cfil_thread %p not NULL", __func__,
4982                     so->so_snd.sb_cfil_thread);
4983         }
4984
4985         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4986
4987         return error;
4988 }
4989
4990 /*
4991  * Callback from socket layer sbappendxxx()
4992  */
4993 int
4994 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4995     struct mbuf *data, struct mbuf *control, uint32_t flags)
4996 {
4997         int error = 0;
4998         int new_filter_control_unit = 0;
4999
5000         if (IS_IP_DGRAM(so)) {
5001                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
5002         }
5003
5004         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5005                 /* Drop pre-existing TCP sockets if filter is enabled now */
5006                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5007                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5008                         if (new_filter_control_unit > 0) {
5009                                 return EPIPE;
5010                         }
5011                 }
5012                 return 0;
5013         }
5014
5015         /* Drop pre-existing TCP sockets when filter state changed */
5016         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5017         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5018                 return EPIPE;
5019         }
5020
5021         /*
5022          * Pass initial data for TFO.
5023          */
5024         if (IS_INITIAL_TFO_DATA(so)) {
5025                 return 0;
5026         }
5027
5028         socket_lock_assert_owned(so);
5029
5030         if (so->so_cfil->cfi_flags & CFIF_DROP) {
5031                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5032                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5033                 return EPIPE;
5034         }
5035         if (control != NULL) {
5036                 CFIL_LOG(LOG_ERR, "so %llx control",
5037                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5038                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5039         }
5040         if (data->m_type == MT_OOBDATA) {
5041                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5042                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5043                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5044         }
5045         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
5046
5047         return error;
5048 }
5049
5050 /*
5051  * Callback from socket layer soshutdownxxx()
5052  *
5053  * We may delay the shutdown write if there's outgoing data in process.
5054  *
5055  * There is no point in delaying the shutdown read because the process
5056  * indicated that it does not want to read anymore data.
5057  */
5058 int
5059 cfil_sock_shutdown(struct socket *so, int *how)
5060 {
5061         int error = 0;
5062
5063         if (IS_IP_DGRAM(so)) {
5064                 return cfil_sock_udp_shutdown(so, how);
5065         }
5066
5067         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5068                 goto done;
5069         }
5070
5071         socket_lock_assert_owned(so);
5072
5073         CFIL_LOG(LOG_INFO, "so %llx how %d",
5074             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5075
5076         /*
5077          * Check the state of the socket before the content filter
5078          */
5079         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5080                 /* read already shut down */
5081                 error = ENOTCONN;
5082                 goto done;
5083         }
5084         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5085                 /* write already shut down */
5086                 error = ENOTCONN;
5087                 goto done;
5088         }
5089
5090         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5091                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5092                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5093                 goto done;
5094         }
5095
5096         /*
5097          * shutdown read: SHUT_RD or SHUT_RDWR
5098          */
5099         if (*how != SHUT_WR) {
5100                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5101                         error = ENOTCONN;
5102                         goto done;
5103                 }
5104                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5105                 cfil_sock_notify_shutdown(so, SHUT_RD);
5106         }
5107         /*
5108          * shutdown write: SHUT_WR or SHUT_RDWR
5109          */
5110         if (*how != SHUT_RD) {
5111                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5112                         error = ENOTCONN;
5113                         goto done;
5114                 }
5115                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5116                 cfil_sock_notify_shutdown(so, SHUT_WR);
5117                 /*
5118                  * When outgoing data is pending, we delay the shutdown at the
5119                  * protocol level until the content filters give the final
5120                  * verdict on the pending data.
5121                  */
5122                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5123                         /*
5124                          * When shutting down the read and write sides at once
5125                          * we can proceed to the final shutdown of the read
5126                          * side. Otherwise, we just return.
5127                          */
5128                         if (*how == SHUT_WR) {
5129                                 error = EJUSTRETURN;
5130                         } else if (*how == SHUT_RDWR) {
5131                                 *how = SHUT_RD;
5132                         }
5133                 }
5134         }
5135 done:
5136         return error;
5137 }
5138
5139 /*
5140  * This is called when the socket is closed and there is no more
5141  * opportunity for filtering
5142  */
5143 void
5144 cfil_sock_is_closed(struct socket *so)
5145 {
5146         errno_t error = 0;
5147         int kcunit;
5148
5149         if (IS_IP_DGRAM(so)) {
5150                 cfil_sock_udp_is_closed(so);
5151                 return;
5152         }
5153
5154         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5155                 return;
5156         }
5157
5158         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5159
5160         socket_lock_assert_owned(so);
5161
5162         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5163                 /* Let the filters know of the closing */
5164                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5165         }
5166
5167         /* Last chance to push passed data out */
5168         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5169         if (error == 0) {
5170                 cfil_service_inject_queue(so, so->so_cfil, 1);
5171         }
5172         cfil_release_sockbuf(so, 1);
5173
5174         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5175
5176         /* Pending data needs to go */
5177         cfil_flush_queues(so, so->so_cfil);
5178
5179         CFIL_INFO_VERIFY(so->so_cfil);
5180 }
5181
5182 /*
5183  * This is called when the socket is disconnected so let the filters
5184  * know about the disconnection and that no more data will come
5185  *
5186  * The how parameter has the same values as soshutown()
5187  */
5188 void
5189 cfil_sock_notify_shutdown(struct socket *so, int how)
5190 {
5191         errno_t error = 0;
5192         int kcunit;
5193
5194         if (IS_IP_DGRAM(so)) {
5195                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5196                 return;
5197         }
5198
5199         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5200                 return;
5201         }
5202
5203         CFIL_LOG(LOG_INFO, "so %llx how %d",
5204             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5205
5206         socket_lock_assert_owned(so);
5207
5208         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5209                 /* Disconnect incoming side */
5210                 if (how != SHUT_WR) {
5211                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5212                 }
5213                 /* Disconnect outgoing side */
5214                 if (how != SHUT_RD) {
5215                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5216                 }
5217         }
5218 }
5219
5220 static int
5221 cfil_filters_attached(struct socket *so)
5222 {
5223         struct cfil_entry *entry;
5224         uint32_t kcunit;
5225         int attached = 0;
5226
5227         if (IS_IP_DGRAM(so)) {
5228                 return cfil_filters_udp_attached(so, FALSE);
5229         }
5230
5231         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5232                 return 0;
5233         }
5234
5235         socket_lock_assert_owned(so);
5236
5237         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5238                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5239
5240                 /* Are we attached to the filter? */
5241                 if (entry->cfe_filter == NULL) {
5242                         continue;
5243                 }
5244                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5245                         continue;
5246                 }
5247                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5248                         continue;
5249                 }
5250                 attached = 1;
5251                 break;
5252         }
5253
5254         return attached;
5255 }
5256
5257 /*
5258  * This is called when the socket is closed and we are waiting for
5259  * the filters to gives the final pass or drop
5260  */
5261 void
5262 cfil_sock_close_wait(struct socket *so)
5263 {
5264         lck_mtx_t *mutex_held;
5265         struct timespec ts;
5266         int error;
5267
5268         if (IS_IP_DGRAM(so)) {
5269                 cfil_sock_udp_close_wait(so);
5270                 return;
5271         }
5272
5273         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5274                 return;
5275         }
5276
5277         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5278
5279         if (so->so_proto->pr_getlock != NULL) {
5280                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5281         } else {
5282                 mutex_held = so->so_proto->pr_domain->dom_mtx;
5283         }
5284         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5285
5286         while (cfil_filters_attached(so)) {
5287                 /*
5288                  * Notify the filters we are going away so they can detach
5289                  */
5290                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5291
5292                 /*
5293                  * Make sure we need to wait after the filter are notified
5294                  * of the disconnection
5295                  */
5296                 if (cfil_filters_attached(so) == 0) {
5297                         break;
5298                 }
5299
5300                 CFIL_LOG(LOG_INFO, "so %llx waiting",
5301                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5302
5303                 ts.tv_sec = cfil_close_wait_timeout / 1000;
5304                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5305                     NSEC_PER_USEC * 1000;
5306
5307                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5308                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5309                 error = msleep((caddr_t)so->so_cfil, mutex_held,
5310                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5311                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5312
5313                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5314                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5315
5316                 /*
5317                  * Force close in case of timeout
5318                  */
5319                 if (error != 0) {
5320                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5321                         break;
5322                 }
5323         }
5324 }
5325
5326 /*
5327  * Returns the size of the data held by the content filter by using
5328  */
5329 int32_t
5330 cfil_sock_data_pending(struct sockbuf *sb)
5331 {
5332         struct socket *so = sb->sb_so;
5333         uint64_t pending = 0;
5334
5335         if (IS_IP_DGRAM(so)) {
5336                 return cfil_sock_udp_data_pending(sb, FALSE);
5337         }
5338
5339         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5340                 struct cfi_buf *cfi_buf;
5341
5342                 socket_lock_assert_owned(so);
5343
5344                 if ((sb->sb_flags & SB_RECV) == 0) {
5345                         cfi_buf = &so->so_cfil->cfi_snd;
5346                 } else {
5347                         cfi_buf = &so->so_cfil->cfi_rcv;
5348                 }
5349
5350                 pending = cfi_buf->cfi_pending_last -
5351                     cfi_buf->cfi_pending_first;
5352
5353                 /*
5354                  * If we are limited by the "chars of mbufs used" roughly
5355                  * adjust so we won't overcommit
5356                  */
5357                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5358                         pending = cfi_buf->cfi_pending_mbcnt;
5359                 }
5360         }
5361
5362         VERIFY(pending < INT32_MAX);
5363
5364         return (int32_t)(pending);
5365 }
5366
5367 /*
5368  * Return the socket buffer space used by data being held by content filters
5369  * so processes won't clog the socket buffer
5370  */
5371 int32_t
5372 cfil_sock_data_space(struct sockbuf *sb)
5373 {
5374         struct socket *so = sb->sb_so;
5375         uint64_t pending = 0;
5376
5377         if (IS_IP_DGRAM(so)) {
5378                 return cfil_sock_udp_data_pending(sb, TRUE);
5379         }
5380
5381         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5382             so->so_snd.sb_cfil_thread != current_thread()) {
5383                 struct cfi_buf *cfi_buf;
5384
5385                 socket_lock_assert_owned(so);
5386
5387                 if ((sb->sb_flags & SB_RECV) == 0) {
5388                         cfi_buf = &so->so_cfil->cfi_snd;
5389                 } else {
5390                         cfi_buf = &so->so_cfil->cfi_rcv;
5391                 }
5392
5393                 pending = cfi_buf->cfi_pending_last -
5394                     cfi_buf->cfi_pending_first;
5395
5396                 /*
5397                  * If we are limited by the "chars of mbufs used" roughly
5398                  * adjust so we won't overcommit
5399                  */
5400                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5401                         pending = cfi_buf->cfi_pending_mbcnt;
5402                 }
5403         }
5404
5405         VERIFY(pending < INT32_MAX);
5406
5407         return (int32_t)(pending);
5408 }
5409
5410 /*
5411  * A callback from the socket and protocol layer when data becomes
5412  * available in the socket buffer to give a chance for the content filter
5413  * to re-inject data that was held back
5414  */
5415 void
5416 cfil_sock_buf_update(struct sockbuf *sb)
5417 {
5418         int outgoing;
5419         int error;
5420         struct socket *so = sb->sb_so;
5421
5422         if (IS_IP_DGRAM(so)) {
5423                 cfil_sock_udp_buf_update(sb);
5424                 return;
5425         }
5426
5427         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5428                 return;
5429         }
5430
5431         if (!cfil_sbtrim) {
5432                 return;
5433         }
5434
5435         socket_lock_assert_owned(so);
5436
5437         if ((sb->sb_flags & SB_RECV) == 0) {
5438                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5439                         return;
5440                 }
5441                 outgoing = 1;
5442                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5443         } else {
5444                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5445                         return;
5446                 }
5447                 outgoing = 0;
5448                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5449         }
5450
5451         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5452             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5453
5454         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5455         if (error == 0) {
5456                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5457         }
5458         cfil_release_sockbuf(so, outgoing);
5459 }
5460
5461 int
5462 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5463     struct sysctl_req *req)
5464 {
5465 #pragma unused(oidp, arg1, arg2)
5466         int error = 0;
5467         size_t len = 0;
5468         u_int32_t i;
5469
5470         /* Read only  */
5471         if (req->newptr != USER_ADDR_NULL) {
5472                 return EPERM;
5473         }
5474
5475         cfil_rw_lock_shared(&cfil_lck_rw);
5476
5477         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5478                 struct cfil_filter_stat filter_stat;
5479                 struct content_filter *cfc = content_filters[i];
5480
5481                 if (cfc == NULL) {
5482                         continue;
5483                 }
5484
5485                 /* If just asking for the size */
5486                 if (req->oldptr == USER_ADDR_NULL) {
5487                         len += sizeof(struct cfil_filter_stat);
5488                         continue;
5489                 }
5490
5491                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5492                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5493                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5494                 filter_stat.cfs_flags = cfc->cf_flags;
5495                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5496                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5497
5498                 error = SYSCTL_OUT(req, &filter_stat,
5499                     sizeof(struct cfil_filter_stat));
5500                 if (error != 0) {
5501                         break;
5502                 }
5503         }
5504         /* If just asking for the size */
5505         if (req->oldptr == USER_ADDR_NULL) {
5506                 req->oldidx = len;
5507         }
5508
5509         cfil_rw_unlock_shared(&cfil_lck_rw);
5510
5511 #if SHOW_DEBUG
5512         if (req->oldptr != USER_ADDR_NULL) {
5513                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5514                         cfil_filter_show(i);
5515                 }
5516         }
5517 #endif
5518
5519         return error;
5520 }
5521
5522 static int
5523 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5524     struct sysctl_req *req)
5525 {
5526 #pragma unused(oidp, arg1, arg2)
5527         int error = 0;
5528         u_int32_t i;
5529         struct cfil_info *cfi;
5530
5531         /* Read only  */
5532         if (req->newptr != USER_ADDR_NULL) {
5533                 return EPERM;
5534         }
5535
5536         cfil_rw_lock_shared(&cfil_lck_rw);
5537
5538         /*
5539          * If just asking for the size,
5540          */
5541         if (req->oldptr == USER_ADDR_NULL) {
5542                 req->oldidx = cfil_sock_attached_count *
5543                     sizeof(struct cfil_sock_stat);
5544                 /* Bump the length in case new sockets gets attached */
5545                 req->oldidx += req->oldidx >> 3;
5546                 goto done;
5547         }
5548
5549         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5550                 struct cfil_entry *entry;
5551                 struct cfil_sock_stat stat;
5552                 struct socket *so = cfi->cfi_so;
5553
5554                 bzero(&stat, sizeof(struct cfil_sock_stat));
5555                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5556                 stat.cfs_sock_id = cfi->cfi_sock_id;
5557                 stat.cfs_flags = cfi->cfi_flags;
5558
5559                 if (so != NULL) {
5560                         stat.cfs_pid = so->last_pid;
5561                         memcpy(stat.cfs_uuid, so->last_uuid,
5562                             sizeof(uuid_t));
5563                         if (so->so_flags & SOF_DELEGATED) {
5564                                 stat.cfs_e_pid = so->e_pid;
5565                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5566                                     sizeof(uuid_t));
5567                         } else {
5568                                 stat.cfs_e_pid = so->last_pid;
5569                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5570                                     sizeof(uuid_t));
5571                         }
5572
5573                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5574                         stat.cfs_sock_type = so->so_proto->pr_type;
5575                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5576                 }
5577
5578                 stat.cfs_snd.cbs_pending_first =
5579                     cfi->cfi_snd.cfi_pending_first;
5580                 stat.cfs_snd.cbs_pending_last =
5581                     cfi->cfi_snd.cfi_pending_last;
5582                 stat.cfs_snd.cbs_inject_q_len =
5583                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5584                 stat.cfs_snd.cbs_pass_offset =
5585                     cfi->cfi_snd.cfi_pass_offset;
5586
5587                 stat.cfs_rcv.cbs_pending_first =
5588                     cfi->cfi_rcv.cfi_pending_first;
5589                 stat.cfs_rcv.cbs_pending_last =
5590                     cfi->cfi_rcv.cfi_pending_last;
5591                 stat.cfs_rcv.cbs_inject_q_len =
5592                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5593                 stat.cfs_rcv.cbs_pass_offset =
5594                     cfi->cfi_rcv.cfi_pass_offset;
5595
5596                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5597                         struct cfil_entry_stat *estat;
5598                         struct cfe_buf *ebuf;
5599                         struct cfe_buf_stat *sbuf;
5600
5601                         entry = &cfi->cfi_entries[i];
5602
5603                         estat = &stat.ces_entries[i];
5604
5605                         estat->ces_len = sizeof(struct cfil_entry_stat);
5606                         estat->ces_filter_id = entry->cfe_filter ?
5607                             entry->cfe_filter->cf_kcunit : 0;
5608                         estat->ces_flags = entry->cfe_flags;
5609                         estat->ces_necp_control_unit =
5610                             entry->cfe_necp_control_unit;
5611
5612                         estat->ces_last_event.tv_sec =
5613                             (int64_t)entry->cfe_last_event.tv_sec;
5614                         estat->ces_last_event.tv_usec =
5615                             (int64_t)entry->cfe_last_event.tv_usec;
5616
5617                         estat->ces_last_action.tv_sec =
5618                             (int64_t)entry->cfe_last_action.tv_sec;
5619                         estat->ces_last_action.tv_usec =
5620                             (int64_t)entry->cfe_last_action.tv_usec;
5621
5622                         ebuf = &entry->cfe_snd;
5623                         sbuf = &estat->ces_snd;
5624                         sbuf->cbs_pending_first =
5625                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5626                         sbuf->cbs_pending_last =
5627                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5628                         sbuf->cbs_ctl_first =
5629                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5630                         sbuf->cbs_ctl_last =
5631                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5632                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5633                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5634                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5635
5636                         ebuf = &entry->cfe_rcv;
5637                         sbuf = &estat->ces_rcv;
5638                         sbuf->cbs_pending_first =
5639                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5640                         sbuf->cbs_pending_last =
5641                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5642                         sbuf->cbs_ctl_first =
5643                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5644                         sbuf->cbs_ctl_last =
5645                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5646                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5647                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5648                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5649                 }
5650                 error = SYSCTL_OUT(req, &stat,
5651                     sizeof(struct cfil_sock_stat));
5652                 if (error != 0) {
5653                         break;
5654                 }
5655         }
5656 done:
5657         cfil_rw_unlock_shared(&cfil_lck_rw);
5658
5659 #if SHOW_DEBUG
5660         if (req->oldptr != USER_ADDR_NULL) {
5661                 cfil_info_show();
5662         }
5663 #endif
5664
5665         return error;
5666 }
5667
5668 /*
5669  * UDP Socket Support
5670  */
5671 static void
5672 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5673 {
5674         char local[MAX_IPv6_STR_LEN + 6];
5675         char remote[MAX_IPv6_STR_LEN + 6];
5676         const void  *addr;
5677
5678         // No sock or not UDP, no-op
5679         if (so == NULL || entry == NULL) {
5680                 return;
5681         }
5682
5683         local[0] = remote[0] = 0x0;
5684
5685         switch (entry->cfentry_family) {
5686         case AF_INET6:
5687                 addr = &entry->cfentry_laddr.addr6;
5688                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5689                 addr = &entry->cfentry_faddr.addr6;
5690                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5691                 break;
5692         case AF_INET:
5693                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5694                 inet_ntop(AF_INET, addr, local, sizeof(local));
5695                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5696                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5697                 break;
5698         default:
5699                 return;
5700         }
5701
5702         CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5703             msg,
5704             IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5705             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5706             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote,
5707             entry->cfentry_flowhash);
5708 }
5709
5710 static void
5711 cfil_inp_log(int level, struct socket *so, const char* msg)
5712 {
5713         struct inpcb *inp = NULL;
5714         char local[MAX_IPv6_STR_LEN + 6];
5715         char remote[MAX_IPv6_STR_LEN + 6];
5716         const void  *addr;
5717
5718         if (so == NULL) {
5719                 return;
5720         }
5721
5722         inp = sotoinpcb(so);
5723         if (inp == NULL) {
5724                 return;
5725         }
5726
5727         local[0] = remote[0] = 0x0;
5728
5729         if (inp->inp_vflag & INP_IPV6) {
5730                 addr = &inp->in6p_laddr.s6_addr32;
5731                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5732                 addr = &inp->in6p_faddr.s6_addr32;
5733                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5734         } else {
5735                 addr = &inp->inp_laddr.s_addr;
5736                 inet_ntop(AF_INET, addr, local, sizeof(local));
5737                 addr = &inp->inp_faddr.s_addr;
5738                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5739         }
5740
5741         if (so->so_cfil != NULL) {
5742                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5743                     msg, IS_UDP(so) ? "UDP" : "TCP",
5744                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5745                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5746         } else {
5747                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5748                     msg, IS_UDP(so) ? "UDP" : "TCP",
5749                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5750                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5751         }
5752 }
5753
5754 static void
5755 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5756 {
5757         if (cfil_info == NULL) {
5758                 return;
5759         }
5760
5761         if (cfil_info->cfi_hash_entry != NULL) {
5762                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5763         } else {
5764                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5765         }
5766 }
5767
5768 errno_t
5769 cfil_db_init(struct socket *so)
5770 {
5771         errno_t error = 0;
5772         struct cfil_db *db = NULL;
5773
5774         CFIL_LOG(LOG_INFO, "");
5775
5776         db = zalloc(cfil_db_zone);
5777         if (db == NULL) {
5778                 error = ENOMEM;
5779                 goto done;
5780         }
5781         bzero(db, sizeof(struct cfil_db));
5782         db->cfdb_so = so;
5783         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5784         if (db->cfdb_hashbase == NULL) {
5785                 zfree(cfil_db_zone, db);
5786                 db = NULL;
5787                 error = ENOMEM;
5788                 goto done;
5789         }
5790
5791         so->so_cfil_db = db;
5792
5793 done:
5794         return error;
5795 }
5796
5797 void
5798 cfil_db_free(struct socket *so)
5799 {
5800         struct cfil_hash_entry *entry = NULL;
5801         struct cfil_hash_entry *temp_entry = NULL;
5802         struct cfilhashhead *cfilhash = NULL;
5803         struct cfil_db *db = NULL;
5804
5805         CFIL_LOG(LOG_INFO, "");
5806
5807         if (so == NULL || so->so_cfil_db == NULL) {
5808                 return;
5809         }
5810         db = so->so_cfil_db;
5811
5812 #if LIFECYCLE_DEBUG
5813         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5814             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5815 #endif
5816
5817         for (int i = 0; i < CFILHASHSIZE; i++) {
5818                 cfilhash = &db->cfdb_hashbase[i];
5819                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5820                         if (entry->cfentry_cfil != NULL) {
5821 #if LIFECYCLE_DEBUG
5822                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5823 #endif
5824                                 CFIL_INFO_FREE(entry->cfentry_cfil);
5825                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5826                                 entry->cfentry_cfil = NULL;
5827                         }
5828
5829                         cfil_db_delete_entry(db, entry);
5830                         if (so->so_flags & SOF_CONTENT_FILTER) {
5831                                 if (db->cfdb_count == 0) {
5832                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5833                                 }
5834                                 VERIFY(so->so_usecount > 0);
5835                                 so->so_usecount--;
5836                         }
5837                 }
5838         }
5839
5840         // Make sure all entries are cleaned up!
5841         VERIFY(db->cfdb_count == 0);
5842 #if LIFECYCLE_DEBUG
5843         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5844 #endif
5845
5846         hashdestroy(db->cfdb_hashbase, M_CFIL, db->cfdb_hashmask);
5847         zfree(cfil_db_zone, db);
5848         so->so_cfil_db = NULL;
5849 }
5850
5851 static bool
5852 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr, bool islocalUpdate)
5853 {
5854         struct sockaddr_in *sin = NULL;
5855         struct sockaddr_in6 *sin6 = NULL;
5856
5857         if (entry == NULL || addr == NULL) {
5858                 return FALSE;
5859         }
5860
5861         switch (addr->sa_family) {
5862         case AF_INET:
5863                 sin = satosin(addr);
5864                 if (sin->sin_len != sizeof(*sin)) {
5865                         return FALSE;
5866                 }
5867                 if (isLocal == TRUE) {
5868                         if (sin->sin_port) {
5869                                 entry->cfentry_lport = sin->sin_port;
5870                                 if (islocalUpdate) {
5871                                         entry->cfentry_lport_updated = TRUE;
5872                                 }
5873                         }
5874                         if (sin->sin_addr.s_addr) {
5875                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5876                                 if (islocalUpdate) {
5877                                         entry->cfentry_laddr_updated = TRUE;
5878                                 }
5879                         }
5880                 } else {
5881                         if (sin->sin_port) {
5882                                 entry->cfentry_fport = sin->sin_port;
5883                         }
5884                         if (sin->sin_addr.s_addr) {
5885                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5886                         }
5887                 }
5888                 entry->cfentry_family = AF_INET;
5889                 return TRUE;
5890         case AF_INET6:
5891                 sin6 = satosin6(addr);
5892                 if (sin6->sin6_len != sizeof(*sin6)) {
5893                         return FALSE;
5894                 }
5895                 if (isLocal == TRUE) {
5896                         if (sin6->sin6_port) {
5897                                 entry->cfentry_lport = sin6->sin6_port;
5898                                 if (islocalUpdate) {
5899                                         entry->cfentry_lport_updated = TRUE;
5900                                 }
5901                         }
5902                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5903                                 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5904                                 if (islocalUpdate) {
5905                                         entry->cfentry_laddr_updated = TRUE;
5906                                 }
5907                         }
5908                 } else {
5909                         if (sin6->sin6_port) {
5910                                 entry->cfentry_fport = sin6->sin6_port;
5911                         }
5912                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5913                                 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5914                         }
5915                 }
5916                 entry->cfentry_family = AF_INET6;
5917                 return TRUE;
5918         default:
5919                 return FALSE;
5920         }
5921 }
5922
5923 static bool
5924 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp, bool islocalUpdate)
5925 {
5926         if (entry == NULL || inp == NULL) {
5927                 return FALSE;
5928         }
5929
5930         if (inp->inp_vflag & INP_IPV6) {
5931                 if (isLocal == TRUE) {
5932                         if (inp->inp_lport) {
5933                                 entry->cfentry_lport = inp->inp_lport;
5934                                 if (islocalUpdate) {
5935                                         entry->cfentry_lport_updated = TRUE;
5936                                 }
5937                         }
5938                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
5939                                 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5940                                 if (islocalUpdate) {
5941                                         entry->cfentry_laddr_updated = TRUE;
5942                                 }
5943                         }
5944                 } else {
5945                         if (inp->inp_fport) {
5946                                 entry->cfentry_fport = inp->inp_fport;
5947                         }
5948                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
5949                                 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5950                         }
5951                 }
5952                 entry->cfentry_family = AF_INET6;
5953                 return TRUE;
5954         } else if (inp->inp_vflag & INP_IPV4) {
5955                 if (isLocal == TRUE) {
5956                         if (inp->inp_lport) {
5957                                 entry->cfentry_lport = inp->inp_lport;
5958                                 if (islocalUpdate) {
5959                                         entry->cfentry_lport_updated = TRUE;
5960                                 }
5961                         }
5962                         if (inp->inp_laddr.s_addr) {
5963                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5964                                 if (islocalUpdate) {
5965                                         entry->cfentry_laddr_updated = TRUE;
5966                                 }
5967                         }
5968                 } else {
5969                         if (inp->inp_fport) {
5970                                 entry->cfentry_fport = inp->inp_fport;
5971                         }
5972                         if (inp->inp_faddr.s_addr) {
5973                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5974                         }
5975                 }
5976                 entry->cfentry_family = AF_INET;
5977                 return TRUE;
5978         }
5979         return FALSE;
5980 }
5981
5982 bool
5983 check_port(struct sockaddr *addr, u_short port)
5984 {
5985         struct sockaddr_in *sin = NULL;
5986         struct sockaddr_in6 *sin6 = NULL;
5987
5988         if (addr == NULL || port == 0) {
5989                 return FALSE;
5990         }
5991
5992         switch (addr->sa_family) {
5993         case AF_INET:
5994                 sin = satosin(addr);
5995                 if (sin->sin_len != sizeof(*sin)) {
5996                         return FALSE;
5997                 }
5998                 if (port == ntohs(sin->sin_port)) {
5999                         return TRUE;
6000                 }
6001                 break;
6002         case AF_INET6:
6003                 sin6 = satosin6(addr);
6004                 if (sin6->sin6_len != sizeof(*sin6)) {
6005                         return FALSE;
6006                 }
6007                 if (port == ntohs(sin6->sin6_port)) {
6008                         return TRUE;
6009                 }
6010                 break;
6011         default:
6012                 break;
6013         }
6014         return FALSE;
6015 }
6016
6017 struct cfil_hash_entry *
6018 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
6019 {
6020         struct cfilhashhead *cfilhash = NULL;
6021         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
6022         struct cfil_hash_entry *nextentry;
6023
6024         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
6025                 return NULL;
6026         }
6027
6028         flowhash &= db->cfdb_hashmask;
6029         cfilhash = &db->cfdb_hashbase[flowhash];
6030
6031         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6032                 if (nextentry->cfentry_cfil != NULL &&
6033                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
6034                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
6035                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
6036                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
6037                         return nextentry;
6038                 }
6039         }
6040
6041         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
6042             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
6043         return NULL;
6044 }
6045
6046 struct cfil_hash_entry *
6047 cfil_db_lookup_entry_internal(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly, boolean_t withLocalPort)
6048 {
6049         struct cfil_hash_entry matchentry = { };
6050         struct cfil_hash_entry *nextentry = NULL;
6051         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6052         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6053         u_int16_t hashkey_fport = 0, hashkey_lport = 0;
6054         int inp_hash_element = 0;
6055         struct cfilhashhead *cfilhash = NULL;
6056
6057         CFIL_LOG(LOG_INFO, "");
6058
6059         if (inp == NULL) {
6060                 goto done;
6061         }
6062
6063         if (local != NULL) {
6064                 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local, FALSE);
6065         } else {
6066                 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp, FALSE);
6067         }
6068         if (remote != NULL) {
6069                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote, FALSE);
6070         } else {
6071                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp, FALSE);
6072         }
6073
6074         if (inp->inp_vflag & INP_IPV6) {
6075                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
6076                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
6077         } else {
6078                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
6079                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
6080         }
6081
6082         hashkey_fport = matchentry.cfentry_fport;
6083         hashkey_lport = (remoteOnly == false || withLocalPort == true) ? matchentry.cfentry_lport : 0;
6084
6085         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
6086         inp_hash_element &= db->cfdb_hashmask;
6087         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6088
6089         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6090                 if ((inp->inp_vflag & INP_IPV6) &&
6091                     (remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6092                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6093                     (remoteOnly || nextentry->cfentry_laddr_updated || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
6094                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
6095 #if DATA_DEBUG
6096                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6097 #endif
6098                         return nextentry;
6099                 } else if ((remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6100                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6101                     (remoteOnly || nextentry->cfentry_laddr_updated || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
6102                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
6103 #if DATA_DEBUG
6104                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6105 #endif
6106                         return nextentry;
6107                 }
6108         }
6109
6110 done:
6111 #if DATA_DEBUG
6112         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6113 #endif
6114         return NULL;
6115 }
6116
6117 struct cfil_hash_entry *
6118 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
6119 {
6120         struct cfil_hash_entry *entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, false);
6121         if (entry == NULL && remoteOnly == true) {
6122                 entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, true);
6123         }
6124         return entry;
6125 }
6126
6127 cfil_sock_id_t
6128 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6129 {
6130         struct cfil_hash_entry *hash_entry = NULL;
6131
6132         socket_lock_assert_owned(so);
6133
6134         if (so->so_cfil_db == NULL) {
6135                 return CFIL_SOCK_ID_NONE;
6136         }
6137
6138         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6139         if (hash_entry == NULL) {
6140                 // No match with both local and remote, try match with remote only
6141                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6142         }
6143         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6144                 return CFIL_SOCK_ID_NONE;
6145         }
6146
6147         return hash_entry->cfentry_cfil->cfi_sock_id;
6148 }
6149
6150 void
6151 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6152 {
6153         if (hash_entry == NULL) {
6154                 return;
6155         }
6156         if (db == NULL || db->cfdb_count == 0) {
6157                 return;
6158         }
6159         db->cfdb_count--;
6160         if (db->cfdb_only_entry == hash_entry) {
6161                 db->cfdb_only_entry = NULL;
6162         }
6163         LIST_REMOVE(hash_entry, cfentry_link);
6164         zfree(cfil_hash_entry_zone, hash_entry);
6165 }
6166
6167 struct cfil_hash_entry *
6168 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6169 {
6170         struct cfil_hash_entry *entry = NULL;
6171         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6172         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6173         int inp_hash_element = 0;
6174         struct cfilhashhead *cfilhash = NULL;
6175
6176         CFIL_LOG(LOG_INFO, "");
6177
6178         if (inp == NULL) {
6179                 goto done;
6180         }
6181
6182         entry = zalloc(cfil_hash_entry_zone);
6183         if (entry == NULL) {
6184                 goto done;
6185         }
6186         bzero(entry, sizeof(struct cfil_hash_entry));
6187
6188         if (local != NULL) {
6189                 fill_cfil_hash_entry_from_address(entry, TRUE, local, FALSE);
6190         } else {
6191                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, FALSE);
6192         }
6193         if (remote != NULL) {
6194                 fill_cfil_hash_entry_from_address(entry, FALSE, remote, FALSE);
6195         } else {
6196                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp, FALSE);
6197         }
6198         entry->cfentry_lastused = net_uptime();
6199
6200         if (inp->inp_vflag & INP_IPV6) {
6201                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6202                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6203         } else {
6204                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6205                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6206         }
6207         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6208             entry->cfentry_lport, entry->cfentry_fport);
6209         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6210
6211         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6212
6213         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6214         db->cfdb_count++;
6215         db->cfdb_only_entry = entry;
6216         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6217
6218 done:
6219         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6220         return entry;
6221 }
6222
6223 void
6224 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local, struct mbuf *control)
6225 {
6226         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6227         union sockaddr_in_4_6 address_buf = { };
6228
6229         CFIL_LOG(LOG_INFO, "");
6230
6231         if (inp == NULL || entry == NULL) {
6232                 return;
6233         }
6234
6235         if (LOCAL_ADDRESS_NEEDS_UPDATE(entry)) {
6236                 // Flow does not have a local address yet.  Retrieve local address
6237                 // from control mbufs if present.
6238                 if (local == NULL && control != NULL) {
6239                         uint8_t *addr_ptr = NULL;
6240                         int size = cfil_sock_udp_get_address_from_control(entry->cfentry_family, control, &addr_ptr);
6241
6242                         if (size && addr_ptr) {
6243                                 switch (entry->cfentry_family) {
6244                                 case AF_INET:
6245                                         if (size == sizeof(struct in_addr)) {
6246                                                 address_buf.sin.sin_port = 0;
6247                                                 address_buf.sin.sin_family = AF_INET;
6248                                                 address_buf.sin.sin_len = sizeof(struct sockaddr_in);
6249                                                 (void) memcpy(&address_buf.sin.sin_addr, addr_ptr, sizeof(struct in_addr));
6250                                                 local = sintosa(&address_buf.sin);
6251                                         }
6252                                         break;
6253                                 case AF_INET6:
6254                                         if (size == sizeof(struct in6_addr)) {
6255                                                 address_buf.sin6.sin6_port = 0;
6256                                                 address_buf.sin6.sin6_family = AF_INET6;
6257                                                 address_buf.sin6.sin6_len = sizeof(struct sockaddr_in6);
6258                                                 (void) memcpy(&address_buf.sin6.sin6_addr, addr_ptr, sizeof(struct in6_addr));
6259                                                 local = sin6tosa(&address_buf.sin6);
6260                                         }
6261                                         break;
6262                                 default:
6263                                         break;
6264                                 }
6265                         }
6266                 }
6267                 if (local != NULL) {
6268                         fill_cfil_hash_entry_from_address(entry, TRUE, local, TRUE);
6269                 } else {
6270                         fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6271                 }
6272         }
6273
6274         if (LOCAL_PORT_NEEDS_UPDATE(entry, db->cfdb_so)) {
6275                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6276         }
6277
6278         return;
6279 }
6280
6281 struct cfil_info *
6282 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6283 {
6284         struct cfil_hash_entry *hash_entry = NULL;
6285
6286         CFIL_LOG(LOG_INFO, "");
6287
6288         if (db == NULL || id == 0) {
6289                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6290                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6291                 return NULL;
6292         }
6293
6294         // This is an optimization for connected UDP socket which only has one flow.
6295         // No need to do the hash lookup.
6296         if (db->cfdb_count == 1) {
6297                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6298                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6299                         return db->cfdb_only_entry->cfentry_cfil;
6300                 }
6301         }
6302
6303         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6304         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6305 }
6306
6307 struct cfil_hash_entry *
6308 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, struct mbuf *control, int debug)
6309 {
6310         struct cfil_hash_entry *hash_entry = NULL;
6311         int new_filter_control_unit = 0;
6312
6313         errno_t error = 0;
6314         socket_lock_assert_owned(so);
6315
6316         // If new socket, allocate cfil db
6317         if (so->so_cfil_db == NULL) {
6318                 if (cfil_db_init(so) != 0) {
6319                         return NULL;
6320                 }
6321         }
6322
6323         // See if flow already exists.
6324         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6325         if (hash_entry == NULL) {
6326                 // No match with both local and remote, try match with remote only
6327                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6328         }
6329         if (hash_entry != NULL) {
6330                 /* Drop pre-existing UDP flow if filter state changed */
6331                 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6332                 if (new_filter_control_unit > 0 &&
6333                     new_filter_control_unit != hash_entry->cfentry_cfil->cfi_filter_control_unit) {
6334                         return NULL;
6335                 }
6336
6337                 // Try to update flow info from socket and/or control mbufs if necessary
6338                 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry) || LOCAL_PORT_NEEDS_UPDATE(hash_entry, so)) {
6339                         cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6340                 }
6341                 return hash_entry;
6342         }
6343
6344         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6345         if (hash_entry == NULL) {
6346                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6347                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6348                 return NULL;
6349         }
6350
6351         if (cfil_info_alloc(so, hash_entry) == NULL ||
6352             hash_entry->cfentry_cfil == NULL) {
6353                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6354                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6355                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6356                 return NULL;
6357         }
6358         hash_entry->cfentry_cfil->cfi_filter_control_unit = filter_control_unit;
6359         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6360         hash_entry->cfentry_cfil->cfi_debug = debug;
6361
6362 #if LIFECYCLE_DEBUG
6363         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6364 #endif
6365
6366         // Check if we can update the new flow's local address from control mbufs
6367         if (control != NULL) {
6368                 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6369         }
6370
6371         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6372                 CFIL_INFO_FREE(hash_entry->cfentry_cfil);
6373                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6374                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6375                     filter_control_unit);
6376                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6377                 return NULL;
6378         }
6379         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6380             (uint64_t)VM_KERNEL_ADDRPERM(so),
6381             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6382
6383         so->so_flags |= SOF_CONTENT_FILTER;
6384         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6385
6386         /* Hold a reference on the socket for each flow */
6387         so->so_usecount++;
6388
6389         if (debug) {
6390                 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6391         }
6392
6393         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6394             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6395         /* We can recover from flow control or out of memory errors */
6396         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6397                 return NULL;
6398         }
6399
6400         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6401         return hash_entry;
6402 }
6403
6404 int
6405 cfil_sock_udp_get_address_from_control(sa_family_t family, struct mbuf *control, uint8_t **address_ptr)
6406 {
6407         struct cmsghdr *cm;
6408         struct in6_pktinfo *pi6;
6409
6410         if (control == NULL || address_ptr == NULL) {
6411                 return 0;
6412         }
6413
6414         while (control) {
6415                 if (control->m_type != MT_CONTROL) {
6416                         control = control->m_next;
6417                         continue;
6418                 }
6419
6420                 for (cm = M_FIRST_CMSGHDR(control);
6421                     is_cmsg_valid(control, cm);
6422                     cm = M_NXT_CMSGHDR(control, cm)) {
6423                         switch (cm->cmsg_type) {
6424                         case IP_RECVDSTADDR:
6425                                 if (family == AF_INET &&
6426                                     cm->cmsg_level == IPPROTO_IP &&
6427                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in_addr))) {
6428                                         *address_ptr = CMSG_DATA(cm);
6429                                         return sizeof(struct in_addr);
6430                                 }
6431                                 break;
6432                         case IPV6_PKTINFO:
6433                         case IPV6_2292PKTINFO:
6434                                 if (family == AF_INET6 &&
6435                                     cm->cmsg_level == IPPROTO_IPV6 &&
6436                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) {
6437                                         pi6 = (struct in6_pktinfo *)(void *)CMSG_DATA(cm);
6438                                         *address_ptr = (uint8_t *)&pi6->ipi6_addr;
6439                                         return sizeof(struct in6_addr);
6440                                 }
6441                                 break;
6442                         default:
6443                                 break;
6444                         }
6445                 }
6446
6447                 control = control->m_next;
6448         }
6449         return 0;
6450 }
6451
6452 errno_t
6453 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6454     struct sockaddr *local, struct sockaddr *remote,
6455     struct mbuf *data, struct mbuf *control, uint32_t flags)
6456 {
6457 #pragma unused(outgoing, so, local, remote, data, control, flags)
6458         errno_t error = 0;
6459         uint32_t filter_control_unit;
6460         struct cfil_hash_entry *hash_entry = NULL;
6461         struct cfil_info *cfil_info = NULL;
6462         int debug = 0;
6463
6464         socket_lock_assert_owned(so);
6465
6466         if (cfil_active_count == 0) {
6467                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6468                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6469                 return error;
6470         }
6471
6472         // Socket has been blessed
6473         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6474                 return error;
6475         }
6476
6477         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6478         if (filter_control_unit == 0) {
6479                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6480                 return error;
6481         }
6482
6483         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6484                 return error;
6485         }
6486
6487         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6488                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6489                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6490                 return error;
6491         }
6492
6493         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, control, debug);
6494         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6495                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6496                 return EPIPE;
6497         }
6498         // Update last used timestamp, this is for flow Idle TO
6499         hash_entry->cfentry_lastused = net_uptime();
6500         cfil_info = hash_entry->cfentry_cfil;
6501
6502         if (cfil_info->cfi_flags & CFIF_DROP) {
6503 #if DATA_DEBUG
6504                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6505 #endif
6506                 return EPIPE;
6507         }
6508         if (control != NULL) {
6509                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6510         }
6511         if (data->m_type == MT_OOBDATA) {
6512                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6513                     (uint64_t)VM_KERNEL_ADDRPERM(so));
6514                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6515         }
6516
6517         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6518
6519         return error;
6520 }
6521
6522 /*
6523  * Go through all UDP flows for specified socket and returns TRUE if
6524  * any flow is still attached.  If need_wait is TRUE, wait on first
6525  * attached flow.
6526  */
6527 static int
6528 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6529 {
6530         struct timespec ts;
6531         lck_mtx_t *mutex_held;
6532         struct cfilhashhead *cfilhash = NULL;
6533         struct cfil_db *db = NULL;
6534         struct cfil_hash_entry *hash_entry = NULL;
6535         struct cfil_hash_entry *temp_hash_entry = NULL;
6536         struct cfil_info *cfil_info = NULL;
6537         struct cfil_entry *entry = NULL;
6538         errno_t error = 0;
6539         int kcunit;
6540         int attached = 0;
6541         uint64_t sock_flow_id = 0;
6542
6543         socket_lock_assert_owned(so);
6544
6545         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6546                 if (so->so_proto->pr_getlock != NULL) {
6547                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6548                 } else {
6549                         mutex_held = so->so_proto->pr_domain->dom_mtx;
6550                 }
6551                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6552
6553                 db = so->so_cfil_db;
6554
6555                 for (int i = 0; i < CFILHASHSIZE; i++) {
6556                         cfilhash = &db->cfdb_hashbase[i];
6557
6558                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6559                                 if (hash_entry->cfentry_cfil != NULL) {
6560                                         cfil_info = hash_entry->cfentry_cfil;
6561                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6562                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
6563
6564                                                 /* Are we attached to the filter? */
6565                                                 if (entry->cfe_filter == NULL) {
6566                                                         continue;
6567                                                 }
6568
6569                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6570                                                         continue;
6571                                                 }
6572                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6573                                                         continue;
6574                                                 }
6575
6576                                                 attached = 1;
6577
6578                                                 if (need_wait == TRUE) {
6579 #if LIFECYCLE_DEBUG
6580                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6581 #endif
6582
6583                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
6584                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6585                                                             NSEC_PER_USEC * 1000;
6586
6587                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6588                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6589                                                         sock_flow_id = cfil_info->cfi_sock_id;
6590
6591                                                         error = msleep((caddr_t)cfil_info, mutex_held,
6592                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6593
6594                                                         // Woke up from sleep, validate if cfil_info is still valid
6595                                                         if (so->so_cfil_db == NULL ||
6596                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6597                                                                 // cfil_info is not valid, do not continue
6598                                                                 goto done;
6599                                                         }
6600
6601                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6602
6603 #if LIFECYCLE_DEBUG
6604                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6605 #endif
6606
6607                                                         /*
6608                                                          * Force close in case of timeout
6609                                                          */
6610                                                         if (error != 0) {
6611                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6612 #if LIFECYCLE_DEBUG
6613                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6614 #endif
6615                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6616                                                         }
6617                                                 }
6618                                                 goto done;
6619                                         }
6620                                 }
6621                         }
6622                 }
6623         }
6624
6625 done:
6626         return attached;
6627 }
6628
6629 int32_t
6630 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6631 {
6632         struct socket *so = sb->sb_so;
6633         struct cfi_buf *cfi_buf;
6634         uint64_t pending = 0;
6635         uint64_t total_pending = 0;
6636         struct cfilhashhead *cfilhash = NULL;
6637         struct cfil_db *db = NULL;
6638         struct cfil_hash_entry *hash_entry = NULL;
6639         struct cfil_hash_entry *temp_hash_entry = NULL;
6640
6641         socket_lock_assert_owned(so);
6642
6643         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6644             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6645                 db = so->so_cfil_db;
6646
6647                 for (int i = 0; i < CFILHASHSIZE; i++) {
6648                         cfilhash = &db->cfdb_hashbase[i];
6649
6650                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6651                                 if (hash_entry->cfentry_cfil != NULL) {
6652                                         if ((sb->sb_flags & SB_RECV) == 0) {
6653                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6654                                         } else {
6655                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6656                                         }
6657
6658                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6659                                         /*
6660                                          * If we are limited by the "chars of mbufs used" roughly
6661                                          * adjust so we won't overcommit
6662                                          */
6663                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6664                                                 pending = cfi_buf->cfi_pending_mbcnt;
6665                                         }
6666
6667                                         total_pending += pending;
6668                                 }
6669                         }
6670                 }
6671
6672                 VERIFY(total_pending < INT32_MAX);
6673 #if DATA_DEBUG
6674                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6675                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6676                     total_pending, check_thread);
6677 #endif
6678         }
6679
6680         return (int32_t)(total_pending);
6681 }
6682
6683 int
6684 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6685 {
6686         struct cfil_info *cfil_info = NULL;
6687         struct cfilhashhead *cfilhash = NULL;
6688         struct cfil_db *db = NULL;
6689         struct cfil_hash_entry *hash_entry = NULL;
6690         struct cfil_hash_entry *temp_hash_entry = NULL;
6691         errno_t error = 0;
6692         int done_count = 0;
6693         int kcunit;
6694
6695         socket_lock_assert_owned(so);
6696
6697         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6698                 db = so->so_cfil_db;
6699
6700                 for (int i = 0; i < CFILHASHSIZE; i++) {
6701                         cfilhash = &db->cfdb_hashbase[i];
6702
6703                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6704                                 if (hash_entry->cfentry_cfil != NULL) {
6705                                         cfil_info = hash_entry->cfentry_cfil;
6706
6707                                         // This flow is marked as DROP
6708                                         if (cfil_info->cfi_flags & drop_flag) {
6709                                                 done_count++;
6710                                                 continue;
6711                                         }
6712
6713                                         // This flow has been shut already, skip
6714                                         if (cfil_info->cfi_flags & shut_flag) {
6715                                                 continue;
6716                                         }
6717                                         // Mark flow as shut
6718                                         cfil_info->cfi_flags |= shut_flag;
6719                                         done_count++;
6720
6721                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6722                                                 /* Disconnect incoming side */
6723                                                 if (how != SHUT_WR) {
6724                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6725                                                 }
6726                                                 /* Disconnect outgoing side */
6727                                                 if (how != SHUT_RD) {
6728                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6729                                                 }
6730                                         }
6731                                 }
6732                         }
6733                 }
6734         }
6735
6736         if (done_count == 0) {
6737                 error = ENOTCONN;
6738         }
6739         return error;
6740 }
6741
6742 int
6743 cfil_sock_udp_shutdown(struct socket *so, int *how)
6744 {
6745         int error = 0;
6746
6747         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6748                 goto done;
6749         }
6750
6751         socket_lock_assert_owned(so);
6752
6753         CFIL_LOG(LOG_INFO, "so %llx how %d",
6754             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6755
6756         /*
6757          * Check the state of the socket before the content filter
6758          */
6759         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6760                 /* read already shut down */
6761                 error = ENOTCONN;
6762                 goto done;
6763         }
6764         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6765                 /* write already shut down */
6766                 error = ENOTCONN;
6767                 goto done;
6768         }
6769
6770         /*
6771          * shutdown read: SHUT_RD or SHUT_RDWR
6772          */
6773         if (*how != SHUT_WR) {
6774                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6775                 if (error != 0) {
6776                         goto done;
6777                 }
6778         }
6779         /*
6780          * shutdown write: SHUT_WR or SHUT_RDWR
6781          */
6782         if (*how != SHUT_RD) {
6783                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6784                 if (error != 0) {
6785                         goto done;
6786                 }
6787
6788                 /*
6789                  * When outgoing data is pending, we delay the shutdown at the
6790                  * protocol level until the content filters give the final
6791                  * verdict on the pending data.
6792                  */
6793                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6794                         /*
6795                          * When shutting down the read and write sides at once
6796                          * we can proceed to the final shutdown of the read
6797                          * side. Otherwise, we just return.
6798                          */
6799                         if (*how == SHUT_WR) {
6800                                 error = EJUSTRETURN;
6801                         } else if (*how == SHUT_RDWR) {
6802                                 *how = SHUT_RD;
6803                         }
6804                 }
6805         }
6806 done:
6807         return error;
6808 }
6809
6810 void
6811 cfil_sock_udp_close_wait(struct socket *so)
6812 {
6813         socket_lock_assert_owned(so);
6814
6815         while (cfil_filters_udp_attached(so, FALSE)) {
6816                 /*
6817                  * Notify the filters we are going away so they can detach
6818                  */
6819                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6820
6821                 /*
6822                  * Make sure we need to wait after the filter are notified
6823                  * of the disconnection
6824                  */
6825                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6826                         break;
6827                 }
6828         }
6829 }
6830
6831 void
6832 cfil_sock_udp_is_closed(struct socket *so)
6833 {
6834         struct cfil_info *cfil_info = NULL;
6835         struct cfilhashhead *cfilhash = NULL;
6836         struct cfil_db *db = NULL;
6837         struct cfil_hash_entry *hash_entry = NULL;
6838         struct cfil_hash_entry *temp_hash_entry = NULL;
6839         errno_t error = 0;
6840         int kcunit;
6841
6842         socket_lock_assert_owned(so);
6843
6844         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6845                 db = so->so_cfil_db;
6846
6847                 for (int i = 0; i < CFILHASHSIZE; i++) {
6848                         cfilhash = &db->cfdb_hashbase[i];
6849
6850                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6851                                 if (hash_entry->cfentry_cfil != NULL) {
6852                                         cfil_info = hash_entry->cfentry_cfil;
6853
6854                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6855                                                 /* Let the filters know of the closing */
6856                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6857                                         }
6858
6859                                         /* Last chance to push passed data out */
6860                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6861                                         if (error == 0) {
6862                                                 cfil_service_inject_queue(so, cfil_info, 1);
6863                                         }
6864                                         cfil_release_sockbuf(so, 1);
6865
6866                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6867
6868                                         /* Pending data needs to go */
6869                                         cfil_flush_queues(so, cfil_info);
6870
6871                                         CFIL_INFO_VERIFY(cfil_info);
6872                                 }
6873                         }
6874                 }
6875         }
6876 }
6877
6878 void
6879 cfil_sock_udp_buf_update(struct sockbuf *sb)
6880 {
6881         struct cfil_info *cfil_info = NULL;
6882         struct cfilhashhead *cfilhash = NULL;
6883         struct cfil_db *db = NULL;
6884         struct cfil_hash_entry *hash_entry = NULL;
6885         struct cfil_hash_entry *temp_hash_entry = NULL;
6886         errno_t error = 0;
6887         int outgoing;
6888         struct socket *so = sb->sb_so;
6889
6890         socket_lock_assert_owned(so);
6891
6892         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6893                 if (!cfil_sbtrim) {
6894                         return;
6895                 }
6896
6897                 db = so->so_cfil_db;
6898
6899                 for (int i = 0; i < CFILHASHSIZE; i++) {
6900                         cfilhash = &db->cfdb_hashbase[i];
6901
6902                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6903                                 if (hash_entry->cfentry_cfil != NULL) {
6904                                         cfil_info = hash_entry->cfentry_cfil;
6905
6906                                         if ((sb->sb_flags & SB_RECV) == 0) {
6907                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6908                                                         return;
6909                                                 }
6910                                                 outgoing = 1;
6911                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6912                                         } else {
6913                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6914                                                         return;
6915                                                 }
6916                                                 outgoing = 0;
6917                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6918                                         }
6919
6920                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6921                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6922
6923                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6924                                         if (error == 0) {
6925                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6926                                         }
6927                                         cfil_release_sockbuf(so, outgoing);
6928                                 }
6929                         }
6930                 }
6931         }
6932 }
6933
6934 void
6935 cfil_filter_show(u_int32_t kcunit)
6936 {
6937         struct content_filter *cfc = NULL;
6938         struct cfil_entry *entry;
6939         int count = 0;
6940
6941         if (content_filters == NULL) {
6942                 return;
6943         }
6944         if (kcunit > MAX_CONTENT_FILTER) {
6945                 return;
6946         }
6947
6948         cfil_rw_lock_shared(&cfil_lck_rw);
6949
6950         if (content_filters[kcunit - 1] == NULL) {
6951                 cfil_rw_unlock_shared(&cfil_lck_rw);
6952                 return;
6953         }
6954         cfc = content_filters[kcunit - 1];
6955
6956         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6957             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6958         if (cfc->cf_flags & CFF_DETACHING) {
6959                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6960         }
6961         if (cfc->cf_flags & CFF_ACTIVE) {
6962                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6963         }
6964         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6965                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6966         }
6967
6968         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6969                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6970                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6971
6972                         count++;
6973
6974                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6975                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6976                         } else {
6977                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6978                         }
6979                 }
6980         }
6981
6982         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6983
6984         cfil_rw_unlock_shared(&cfil_lck_rw);
6985 }
6986
6987 void
6988 cfil_info_show(void)
6989 {
6990         struct cfil_info *cfil_info;
6991         int count = 0;
6992
6993         cfil_rw_lock_shared(&cfil_lck_rw);
6994
6995         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6996
6997         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6998                 count++;
6999
7000                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
7001
7002                 if (cfil_info->cfi_flags & CFIF_DROP) {
7003                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
7004                 }
7005                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
7006                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
7007                 }
7008                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
7009                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
7010                 }
7011                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
7012                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
7013                 }
7014                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
7015                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
7016                 }
7017                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
7018                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
7019                 }
7020                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
7021                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
7022                 }
7023         }
7024
7025         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
7026
7027         cfil_rw_unlock_shared(&cfil_lck_rw);
7028 }
7029
7030 bool
7031 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int64_t current_time)
7032 {
7033         if (cfil_info && cfil_info->cfi_hash_entry &&
7034             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int64_t)timeout)) {
7035 #if GC_DEBUG
7036                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
7037 #endif
7038                 return true;
7039         }
7040         return false;
7041 }
7042
7043 bool
7044 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
7045 {
7046         struct cfil_entry *entry;
7047         struct timeval current_tv;
7048         struct timeval diff_time;
7049
7050         if (cfil_info == NULL) {
7051                 return false;
7052         }
7053
7054         /*
7055          * If we have queued up more data than passed offset and we haven't received
7056          * an action from user space for a while (the user space filter might have crashed),
7057          * return action timed out.
7058          */
7059         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
7060             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
7061                 microuptime(&current_tv);
7062
7063                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7064                         entry = &cfil_info->cfi_entries[kcunit - 1];
7065
7066                         if (entry->cfe_filter == NULL) {
7067                                 continue;
7068                         }
7069
7070                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
7071                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
7072                                 // haven't gotten an action from this filter, check timeout
7073                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
7074                                 if (diff_time.tv_sec >= timeout) {
7075 #if GC_DEBUG
7076                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
7077 #endif
7078                                         return true;
7079                                 }
7080                         }
7081                 }
7082         }
7083         return false;
7084 }
7085
7086 bool
7087 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7088 {
7089         if (cfil_info == NULL) {
7090                 return false;
7091         }
7092
7093         /*
7094          * Clean up flow if it exceeded queue thresholds
7095          */
7096         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7097             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7098 #if GC_DEBUG
7099                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7100                     cfil_udp_gc_mbuf_num_max,
7101                     cfil_udp_gc_mbuf_cnt_max,
7102                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
7103                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7104                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7105 #endif
7106                 return true;
7107         }
7108
7109         return false;
7110 }
7111
7112 static void
7113 cfil_udp_gc_thread_sleep(bool forever)
7114 {
7115         if (forever) {
7116                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
7117                     THREAD_INTERRUPTIBLE);
7118         } else {
7119                 uint64_t deadline = 0;
7120                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
7121                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7122
7123                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
7124                     THREAD_INTERRUPTIBLE, deadline);
7125         }
7126 }
7127
7128 static void
7129 cfil_udp_gc_thread_func(void *v, wait_result_t w)
7130 {
7131 #pragma unused(v, w)
7132
7133         ASSERT(cfil_udp_gc_thread == current_thread());
7134         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7135
7136         // Kick off gc shortly
7137         cfil_udp_gc_thread_sleep(false);
7138         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
7139         /* NOTREACHED */
7140 }
7141
7142 static void
7143 cfil_info_udp_expire(void *v, wait_result_t w)
7144 {
7145 #pragma unused(v, w)
7146
7147         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
7148         static uint32_t expired_count = 0;
7149
7150         struct cfil_info *cfil_info;
7151         struct cfil_hash_entry *hash_entry;
7152         struct cfil_db *db;
7153         struct socket *so;
7154         u_int64_t current_time = 0;
7155
7156         current_time = net_uptime();
7157
7158         // Get all expired UDP flow ids
7159         cfil_rw_lock_shared(&cfil_lck_rw);
7160
7161         if (cfil_sock_udp_attached_count == 0) {
7162                 cfil_rw_unlock_shared(&cfil_lck_rw);
7163                 goto go_sleep;
7164         }
7165
7166         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
7167                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
7168                         break;
7169                 }
7170
7171                 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
7172                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
7173                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7174                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
7175                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
7176                                 expired_count++;
7177                         }
7178                 }
7179         }
7180         cfil_rw_unlock_shared(&cfil_lck_rw);
7181
7182         if (expired_count == 0) {
7183                 goto go_sleep;
7184         }
7185
7186         for (uint32_t i = 0; i < expired_count; i++) {
7187                 // Search for socket (UDP only and lock so)
7188                 so = cfil_socket_from_sock_id(expired_array[i], true);
7189                 if (so == NULL) {
7190                         continue;
7191                 }
7192
7193                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
7194                 if (cfil_info == NULL) {
7195                         goto unlock;
7196                 }
7197
7198                 db = so->so_cfil_db;
7199                 hash_entry = cfil_info->cfi_hash_entry;
7200
7201                 if (db == NULL || hash_entry == NULL) {
7202                         goto unlock;
7203                 }
7204
7205 #if GC_DEBUG || LIFECYCLE_DEBUG
7206                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
7207 #endif
7208
7209                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7210                         /* Let the filters know of the closing */
7211                         if (cfil_dispatch_closed_event(so, cfil_info, kcunit) != 0) {
7212                                 goto unlock;
7213                         }
7214                 }
7215
7216                 cfil_db_delete_entry(db, hash_entry);
7217                 CFIL_INFO_FREE(cfil_info);
7218                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7219
7220                 if (so->so_flags & SOF_CONTENT_FILTER) {
7221                         if (db->cfdb_count == 0) {
7222                                 so->so_flags &= ~SOF_CONTENT_FILTER;
7223                         }
7224                         VERIFY(so->so_usecount > 0);
7225                         so->so_usecount--;
7226                 }
7227 unlock:
7228                 socket_unlock(so, 1);
7229         }
7230
7231 #if GC_DEBUG
7232         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
7233 #endif
7234         expired_count = 0;
7235
7236 go_sleep:
7237
7238         // Sleep forever (until waken up) if no more UDP flow to clean
7239         cfil_rw_lock_shared(&cfil_lck_rw);
7240         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7241         cfil_rw_unlock_shared(&cfil_lck_rw);
7242         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7243         /* NOTREACHED */
7244 }
7245
7246 struct m_tag *
7247 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7248 {
7249         struct m_tag *tag = NULL;
7250         struct cfil_tag *ctag = NULL;
7251         struct cfil_hash_entry *hash_entry = NULL;
7252         struct inpcb *inp = NULL;
7253
7254         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7255             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7256                 return NULL;
7257         }
7258
7259         inp = sotoinpcb(cfil_info->cfi_so);
7260
7261         /* Allocate a tag */
7262         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7263             sizeof(struct cfil_tag), M_DONTWAIT, m);
7264
7265         if (tag) {
7266                 ctag = (struct cfil_tag*)(tag + 1);
7267                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7268                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7269                 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7270
7271                 hash_entry = cfil_info->cfi_hash_entry;
7272                 if (hash_entry->cfentry_family == AF_INET6) {
7273                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7274                             &hash_entry->cfentry_faddr.addr6,
7275                             hash_entry->cfentry_fport);
7276                 } else if (hash_entry->cfentry_family == AF_INET) {
7277                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7278                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
7279                             hash_entry->cfentry_fport);
7280                 }
7281                 m_tag_prepend(m, tag);
7282                 return tag;
7283         }
7284         return NULL;
7285 }
7286
7287 struct m_tag *
7288 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7289     struct sockaddr **faddr, int *inp_flags)
7290 {
7291         struct m_tag *tag = NULL;
7292         struct cfil_tag *ctag = NULL;
7293
7294         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7295         if (tag) {
7296                 ctag = (struct cfil_tag *)(tag + 1);
7297                 if (state_change_cnt) {
7298                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
7299                 }
7300                 if (options) {
7301                         *options = ctag->cfil_so_options;
7302                 }
7303                 if (faddr) {
7304                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7305                 }
7306                 if (inp_flags) {
7307                         *inp_flags = ctag->cfil_inp_flags;
7308                 }
7309
7310                 /*
7311                  * Unlink tag and hand it over to caller.
7312                  * Note that caller will be responsible to free it.
7313                  */
7314                 m_tag_unlink(m, tag);
7315                 return tag;
7316         }
7317         return NULL;
7318 }
7319
7320 boolean_t
7321 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7322 {
7323         struct m_tag *tag = NULL;
7324         struct cfil_tag *ctag = NULL;
7325
7326         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7327         if (tag) {
7328                 ctag = (struct cfil_tag *)(tag + 1);
7329                 if (inp_flags) {
7330                         *inp_flags = ctag->cfil_inp_flags;
7331                 }
7332                 return true;
7333         }
7334         return false;
7335 }
7336
7337 static int
7338 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7339 {
7340         struct content_filter *cfc = NULL;
7341         errno_t error = 0;
7342         size_t msgsize = 0;
7343
7344         if (buffer == NULL || stats_count == 0) {
7345                 return error;
7346         }
7347
7348         if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7349                 return error;
7350         }
7351
7352         cfc = content_filters[kcunit - 1];
7353         if (cfc == NULL) {
7354                 return error;
7355         }
7356
7357         /* Would be wasteful to try */
7358         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7359                 error = ENOBUFS;
7360                 goto done;
7361         }
7362
7363         msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7364         buffer->msghdr.cfm_len = (uint32_t)msgsize;
7365         buffer->msghdr.cfm_version = 1;
7366         buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7367         buffer->msghdr.cfm_op = CFM_OP_STATS;
7368         buffer->msghdr.cfm_sock_id = 0;
7369         buffer->count = stats_count;
7370
7371 #if STATS_DEBUG
7372         CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7373             kcunit,
7374             (unsigned long)msgsize,
7375             (unsigned long)sizeof(struct cfil_msg_stats_report),
7376             (unsigned long)sizeof(struct cfil_msg_sock_stats),
7377             (unsigned long)stats_count);
7378 #endif
7379
7380         error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7381             buffer,
7382             msgsize,
7383             CTL_DATA_EOR);
7384         if (error != 0) {
7385                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7386                 goto done;
7387         }
7388         OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7389
7390 #if STATS_DEBUG
7391         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7392 #endif
7393
7394 done:
7395
7396         if (error == ENOBUFS) {
7397                 OSIncrementAtomic(
7398                         &cfil_stats.cfs_stats_event_flow_control);
7399
7400                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7401                         cfil_rw_lock_exclusive(&cfil_lck_rw);
7402                 }
7403
7404                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7405
7406                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
7407         } else if (error != 0) {
7408                 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7409         }
7410
7411         return error;
7412 }
7413
7414 static void
7415 cfil_stats_report_thread_sleep(bool forever)
7416 {
7417 #if STATS_DEBUG
7418         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7419 #endif
7420
7421         if (forever) {
7422                 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7423                     THREAD_INTERRUPTIBLE);
7424         } else {
7425                 uint64_t deadline = 0;
7426                 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7427                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7428
7429                 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7430                     THREAD_INTERRUPTIBLE, deadline);
7431         }
7432 }
7433
7434 static void
7435 cfil_stats_report_thread_func(void *v, wait_result_t w)
7436 {
7437 #pragma unused(v, w)
7438
7439         ASSERT(cfil_stats_report_thread == current_thread());
7440         thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7441
7442         // Kick off gc shortly
7443         cfil_stats_report_thread_sleep(false);
7444         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7445         /* NOTREACHED */
7446 }
7447
7448 static bool
7449 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7450     struct cfil_info *cfil_info,
7451     struct cfil_entry *entry,
7452     struct timeval current_tv)
7453 {
7454         struct cfil_stats_report_buffer *buffer = NULL;
7455         struct cfil_msg_sock_stats *flow_array = NULL;
7456         struct cfil_msg_sock_stats *stats = NULL;
7457         struct inpcb *inp = NULL;
7458         struct timeval diff_time;
7459         uint64_t diff_time_usecs;
7460         int index = 0;
7461
7462         if (entry->cfe_stats_report_frequency == 0) {
7463                 return false;
7464         }
7465
7466         buffer = global_cfil_stats_report_buffers[kcunit - 1];
7467         if (buffer == NULL) {
7468 #if STATS_DEBUG
7469                 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7470 #endif
7471                 return false;
7472         }
7473
7474         timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7475         diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7476
7477 #if STATS_DEBUG
7478         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7479             (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7480             (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7481             (unsigned long long)current_tv.tv_sec,
7482             (unsigned long long)current_tv.tv_usec,
7483             (unsigned long long)diff_time.tv_sec,
7484             (unsigned long long)diff_time.tv_usec,
7485             (unsigned long long)diff_time_usecs,
7486             (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7487             cfil_info->cfi_sock_id);
7488 #endif
7489
7490         // Compare elapsed time in usecs
7491         if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7492 #if STATS_DEBUG
7493                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7494                     cfil_info->cfi_byte_inbound_count,
7495                     entry->cfe_byte_inbound_count_reported);
7496                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7497                     cfil_info->cfi_byte_outbound_count,
7498                     entry->cfe_byte_outbound_count_reported);
7499 #endif
7500                 // Check if flow has new bytes that have not been reported
7501                 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7502                     entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7503                         flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7504                         index = global_cfil_stats_counts[kcunit - 1];
7505
7506                         stats = &flow_array[index];
7507                         stats->cfs_sock_id = cfil_info->cfi_sock_id;
7508                         stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7509                         stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7510
7511                         if (entry->cfe_laddr_sent == false) {
7512                                 /* cache it if necessary */
7513                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7514                                         inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7515                                         if (inp != NULL) {
7516                                                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7517                                                 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7518                                                 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7519                                                 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7520                                                     src, dst, !IS_INP_V6(inp), outgoing);
7521                                         }
7522                                 }
7523
7524                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7525                                         stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7526                                         entry->cfe_laddr_sent = true;
7527                                 }
7528                         }
7529
7530                         global_cfil_stats_counts[kcunit - 1]++;
7531
7532                         entry->cfe_stats_report_ts = current_tv;
7533                         entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7534                         entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7535 #if STATS_DEBUG
7536                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7537 #endif
7538                         CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7539                         return true;
7540                 }
7541         }
7542         return false;
7543 }
7544
7545 static void
7546 cfil_stats_report(void *v, wait_result_t w)
7547 {
7548 #pragma unused(v, w)
7549
7550         struct cfil_info *cfil_info = NULL;
7551         struct cfil_entry *entry = NULL;
7552         struct timeval current_tv;
7553         uint32_t flow_count = 0;
7554         uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7555         bool flow_reported = false;
7556
7557 #if STATS_DEBUG
7558         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7559 #endif
7560
7561         do {
7562                 // Collect all sock ids of flows that has new stats
7563                 cfil_rw_lock_shared(&cfil_lck_rw);
7564
7565                 if (cfil_sock_attached_stats_count == 0) {
7566 #if STATS_DEBUG
7567                         CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7568 #endif
7569                         cfil_rw_unlock_shared(&cfil_lck_rw);
7570                         goto go_sleep;
7571                 }
7572
7573                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7574                         if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7575                                 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7576                         }
7577                         global_cfil_stats_counts[kcunit - 1] = 0;
7578                 }
7579
7580                 microuptime(&current_tv);
7581                 flow_count = 0;
7582
7583                 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7584                         if (saved_next_sock_id != 0 &&
7585                             saved_next_sock_id == cfil_info->cfi_sock_id) {
7586                                 // Here is where we left off previously, start accumulating
7587                                 saved_next_sock_id = 0;
7588                         }
7589
7590                         if (saved_next_sock_id == 0) {
7591                                 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7592                                         // Examine a fixed number of flows each round.  Remember the current flow
7593                                         // so we can start from here for next loop
7594                                         saved_next_sock_id = cfil_info->cfi_sock_id;
7595                                         break;
7596                                 }
7597
7598                                 flow_reported = false;
7599                                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7600                                         entry = &cfil_info->cfi_entries[kcunit - 1];
7601                                         if (entry->cfe_filter == NULL) {
7602 #if STATS_DEBUG
7603                                                 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7604                                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7605 #endif
7606                                                 continue;
7607                                         }
7608
7609                                         if ((entry->cfe_stats_report_frequency > 0) &&
7610                                             cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7611                                                 flow_reported = true;
7612                                         }
7613                                 }
7614                                 if (flow_reported == true) {
7615                                         flow_count++;
7616                                 }
7617                         }
7618                 }
7619
7620                 if (flow_count > 0) {
7621 #if STATS_DEBUG
7622                         CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7623 #endif
7624                         for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7625                                 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7626                                     global_cfil_stats_counts[kcunit - 1] > 0) {
7627                                         cfil_dispatch_stats_event_locked(kcunit,
7628                                             global_cfil_stats_report_buffers[kcunit - 1],
7629                                             global_cfil_stats_counts[kcunit - 1]);
7630                                 }
7631                         }
7632                 } else {
7633                         cfil_rw_unlock_shared(&cfil_lck_rw);
7634                         goto go_sleep;
7635                 }
7636
7637                 cfil_rw_unlock_shared(&cfil_lck_rw);
7638
7639                 // Loop again if we haven't finished the whole cfil_info list
7640         } while (saved_next_sock_id != 0);
7641
7642 go_sleep:
7643
7644         // Sleep forever (until waken up) if no more flow to report
7645         cfil_rw_lock_shared(&cfil_lck_rw);
7646         cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7647         cfil_rw_unlock_shared(&cfil_lck_rw);
7648         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7649         /* NOTREACHED */
7650 }