bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by INET/INET6 sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many INET/INET6 sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
  50  *   UDP, ICMP, etc).
  51  * - The current implementation supports up to two simultaneous content filters
  52  *   for iOS devices and eight simultaneous content filters for OSX.
  53  *
  54  *
  55  * NECP FILTER CONTROL UNIT
  56  *
  57  * A user space filter agent uses the Network Extension Control Policy (NECP)
  58  * database to specify which INET/INET6 sockets need to be filtered. The NECP
  59  * criteria may be based on a variety of properties like user ID or proc UUID.
  60  *
  61  * The NECP "filter control unit" is used by the socket content filter subsystem
  62  * to deliver the relevant INET/INET6 content information to the appropriate
  63  * user space filter agent via its kernel control socket instance.
  64  * This works as follows:
  65  *
  66  * 1) The user space filter agent specifies an NECP filter control unit when
  67  *    in adds its filtering rules to the NECP database.
  68  *
  69  * 2) The user space filter agent also sets its NECP filter control unit on the
  70  *    content filter kernel control socket via the socket option
  71  *    CFIL_OPT_NECP_CONTROL_UNIT.
  72  *
  73  * 3) The NECP database is consulted to find out if a given INET/INET6 socket
  74  *    needs to be subjected to content filtering and returns the corresponding
  75  *    NECP filter control unit  -- the NECP filter control unit is actually
  76  *    stored in the INET/INET6 socket structure so the NECP lookup is really simple.
  77  *
  78  * 4) The NECP filter control unit is then used to find the corresponding
  79  *    kernel control socket instance.
  80  *
  81  * Note: NECP currently supports a single filter control unit per INET/INET6 socket
  82  *       but this restriction may be soon lifted.
  83  *
  84  *
  85  * THE MESSAGING PROTOCOL
  86  *
  87  * The socket content filter subsystem and a user space filter agent
  88  * communicate over the kernel control socket via an asynchronous
  89  * messaging protocol (this is not a request-response protocol).
  90  * The socket content filter subsystem sends event messages to the user
  91  * space filter agent about the INET/INET6 sockets it is interested to filter.
  92  * The user space filter agent sends action messages to either allow
  93  * data to pass or to disallow the data flow (and drop the connection).
  94  *
  95  * All messages over a content filter kernel control socket share the same
  96  * common header of type "struct cfil_msg_hdr". The message type tells if
  97  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  98  * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
  99  * For TCP, flows are per-socket.  For UDP and other datagrame protocols, there
 100  * could be multiple flows per socket.
 101  *
 102  * Note the message header length field may be padded for alignment and can
 103  * be larger than the actual content of the message.
 104  * The field "cfm_op" describe the kind of event or action.
 105  *
 106  * Here are the kinds of content filter events:
 107  * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
 108  * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
 109  * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
 110  * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
 111  *
 112  *
 113  * EVENT MESSAGES
 114  *
 115  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 116  * data that is being sent or received. The position of this span of data
 117  * in the data flow is described by a set of start and end offsets. These
 118  * are absolute 64 bits offsets. The first byte sent (or received) starts
 119  * at offset 0 and ends at offset 1. The length of the content data
 120  * is given by the difference between the end offset and the start offset.
 121  *
 122  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 123  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 124  * action message is sent by the user space filter agent.
 125  *
 126  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 127  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 128  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 129  *
 130  * They are two kinds of primary content filter actions:
 131  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 132  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 133  *
 134  * There is also an action to mark a given client flow as already filtered
 135  * at a higher level, CFM_OP_BLESS_CLIENT.
 136  *
 137  *
 138  * ACTION MESSAGES
 139  *
 140  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 141  * agent allow data to flow up to the specified pass offset -- there
 142  * is a pass offset for outgoing data and a pass offset for incoming data.
 143  * When a new INET/INET6 socket is attached to the content filter and a flow is
 144  * created, each pass offset is initially set to 0 so no data is allowed to pass by
 145  * default.  When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 146  * then the data flow becomes unrestricted.
 147  *
 148  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 149  * with a pass offset smaller than the pass offset of a previous
 150  * CFM_OP_DATA_UPDATE message is silently ignored.
 151  *
 152  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 153  * to tell the kernel how much data it wants to see by using the peek offsets.
 154  * Just like pass offsets, there is a peek offset for each direction.
 155  * When a new INET/INET6 flow is created, each peek offset is initially set to 0
 156  * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
 157  * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
 158  * by the user space filter agent.  When the peek offset is set to CFM_MAX_OFFSET via
 159  * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
 160  *
 161  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 162  * Also a peek offsets cannot be smaller than the corresponding end offset
 163  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 164  * to set a too small peek value is silently ignored.
 165  *
 166  *
 167  * PER FLOW "struct cfil_info"
 168  *
 169  * As soon as a INET/INET6 socket gets attached to a content filter, a
 170  * "struct cfil_info" is created to hold the content filtering state for this
 171  * socket.  For UDP and other datagram protocols, as soon as traffic is seen for
 172  * each new flow identified by its 4-tuple of source address/port and destination
 173  * address/port, a "struct cfil_info" is created.  Each datagram socket may
 174  * have multiple flows maintained in a hash table of "struct cfil_info" entries.
 175  *
 176  * The content filtering state is made of the following information
 177  * for each direction:
 178  * - The current pass offset;
 179  * - The first and last offsets of the data pending, waiting for a filtering
 180  *   decision;
 181  * - The inject queue for data that passed the filters and that needs
 182  *   to be re-injected;
 183  * - A content filter specific state in a set of  "struct cfil_entry"
 184  *
 185  *
 186  * CONTENT FILTER STATE "struct cfil_entry"
 187  *
 188  * The "struct cfil_entry" maintains the information most relevant to the
 189  * message handling over a kernel control socket with a user space filter agent.
 190  *
 191  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 192  * to the kernel control socket unit it corresponds to and also has a pointer
 193  * to the corresponding "struct content_filter".
 194  *
 195  * For each direction, "struct cfil_entry" maintains the following information:
 196  * - The pass offset
 197  * - The peek offset
 198  * - The offset of the last data peeked at by the filter
 199  * - A queue of data that's waiting to be delivered to the  user space filter
 200  *   agent on the kernel control socket
 201  * - A queue of data for which event messages have been sent on the kernel
 202  *   control socket and are pending for a filtering decision.
 203  *
 204  *
 205  * CONTENT FILTER QUEUES
 206  *
 207  * Data that is being filtered is steered away from the INET/INET6 socket buffer
 208  * and instead will sit in one of three content filter queues until the data
 209  * can be re-injected into the INET/INET6 socket buffer.
 210  *
 211  * A content filter queue is represented by "struct cfil_queue" that contains
 212  * a list of mbufs and the start and end offset of the data span of
 213  * the list of mbufs.
 214  *
 215  * The data moves into the three content filter queues according to this
 216  * sequence:
 217  * a) The "cfe_ctl_q" of "struct cfil_entry"
 218  * b) The "cfe_pending_q" of "struct cfil_entry"
 219  * c) The "cfi_inject_q" of "struct cfil_info"
 220  *
 221  * Note: The sequence (a),(b) may be repeated several times if there is more
 222  * than one content filter attached to the INET/INET6 socket.
 223  *
 224  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 225  * kernel conntrol socket for two reasons:
 226  * - The peek offset is less that the end offset of the mbuf data
 227  * - The kernel control socket is flow controlled
 228  *
 229  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 230  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 231  * socket and are waiting for a pass action message fromn the user space
 232  * filter agent. An mbuf length must be fully allowed to pass to be removed
 233  * from the cfe_pending_q.
 234  *
 235  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 236  * by the user space filter agent and that needs to be re-injected into the
 237  * INET/INET6 socket.
 238  *
 239  *
 240  * IMPACT ON FLOW CONTROL
 241  *
 242  * An essential aspect of the content filer subsystem is to minimize the
 243  * impact on flow control of the INET/INET6 sockets being filtered.
 244  *
 245  * The processing overhead of the content filtering may have an effect on
 246  * flow control by adding noticeable delays and cannot be eliminated --
 247  * care must be taken by the user space filter agent to minimize the
 248  * processing delays.
 249  *
 250  * The amount of data being filtered is kept in buffers while waiting for
 251  * a decision by the user space filter agent. This amount of data pending
 252  * needs to be subtracted from the amount of data available in the
 253  * corresponding INET/INET6 socket buffer. This is done by modifying
 254  * sbspace() and tcp_sbspace() to account for amount of data pending
 255  * in the content filter.
 256  *
 257  *
 258  * LOCKING STRATEGY
 259  *
 260  * The global state of content filter subsystem is protected by a single
 261  * read-write lock "cfil_lck_rw". The data flow can be done with the
 262  * cfil read-write lock held as shared so it can be re-entered from multiple
 263  * threads.
 264  *
 265  * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
 266  * protected by the socket lock.
 267  *
 268  * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
 269  * is held. That's why we have some sequences where we drop the cfil read-write
 270  * lock before taking the INET/INET6 lock.
 271  *
 272  * It is also important to lock the INET/INET6 socket buffer while the content
 273  * filter is modifying the amount of pending data. Otherwise the calculations
 274  * in sbspace() and tcp_sbspace()  could be wrong.
 275  *
 276  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 277  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 278  *
 279  * Actually "cfe_link" and "cfe_filter" are protected by both by
 280  * "cfil_lck_rw" and the socket lock: they may be modified only when
 281  * "cfil_lck_rw" is exclusive and the socket is locked.
 282  *
 283  * To read the other fields of "struct content_filter" we have to take
 284  * "cfil_lck_rw" in shared mode.
 285  *
 286  * DATAGRAM SPECIFICS:
 287  *
 288  * The socket content filter supports all INET/INET6 protocols.  However
 289  * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
 290  * are slightly different.
 291  *
 292  * Each datagram socket may have multiple flows.  Each flow is identified
 293  * by the flow's source address/port and destination address/port tuple
 294  * and is represented as a "struct cfil_info" entry.  For each socket,
 295  * a hash table is used to maintain the collection of flows under that socket.
 296  *
 297  * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
 298  * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt.  This portion
 299  * of the cfi_sock_id is used locate the socket during socket lookup.  The lowest 32-bits
 300  * of the cfi_sock_id contains a hash of the flow's 4-tuple.  This portion of the cfi_sock_id
 301  * is used as the hash value for the flow hash table lookup within the parent socket.
 302  *
 303  * Since datagram sockets may not be connected, flow states may not be maintained in the
 304  * socket structures and thus have to be saved for each packet.  These saved states will be
 305  * used for both outgoing and incoming reinjections.  For outgoing packets, destination
 306  * address/port as well as the current socket states will be saved.  During reinjection,
 307  * these saved states will be used instead.  For incoming packets, control and address
 308  * mbufs will be chained to the data.  During reinjection, the whole chain will be queued
 309  * onto the incoming socket buffer.
 310  *
 311  * LIMITATIONS
 312  *
 313  * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
 314  *
 315  * - Does not support TCP unordered messages
 316  */
 317
 318 /*
 319  *      TO DO LIST
 320  *
 321  *      Deal with OOB
 322  *
 323  */
 324
 325 #include <sys/types.h>
 326 #include <sys/kern_control.h>
 327 #include <sys/queue.h>
 328 #include <sys/domain.h>
 329 #include <sys/protosw.h>
 330 #include <sys/syslog.h>
 331 #include <sys/systm.h>
 332 #include <sys/param.h>
 333 #include <sys/mbuf.h>
 334
 335 #include <kern/locks.h>
 336 #include <kern/zalloc.h>
 337 #include <kern/debug.h>
 338
 339 #include <net/content_filter.h>
 340 #include <net/content_filter_crypto.h>
 341
 342 #define _IP_VHL
 343 #include <netinet/ip.h>
 344 #include <netinet/in_pcb.h>
 345 #include <netinet/tcp.h>
 346 #include <netinet/tcp_var.h>
 347 #include <netinet/udp.h>
 348 #include <netinet/udp_var.h>
 349
 350 #include <string.h>
 351 #include <libkern/libkern.h>
 352 #include <kern/sched_prim.h>
 353 #include <kern/task.h>
 354 #include <mach/task_info.h>
 355
 356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 357 #define MAX_CONTENT_FILTER 2
 358 #else
 359 #define MAX_CONTENT_FILTER 8
 360 #endif
 361
 362 extern struct inpcbinfo ripcbinfo;
 363 struct cfil_entry;
 364
 365 /*
 366  * The structure content_filter represents a user space content filter
 367  * It's created and associated with a kernel control socket instance
 368  */
 369 struct content_filter {
 370         kern_ctl_ref            cf_kcref;
 371         u_int32_t               cf_kcunit;
 372         u_int32_t               cf_flags;
 373
 374         uint32_t                cf_necp_control_unit;
 375
 376         uint32_t                cf_sock_count;
 377         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 378
 379         cfil_crypto_state_t cf_crypto_state;
 380 };
 381
 382 #define CFF_ACTIVE              0x01
 383 #define CFF_DETACHING           0x02
 384 #define CFF_FLOW_CONTROLLED     0x04
 385
 386 struct content_filter **content_filters = NULL;
 387 uint32_t cfil_active_count = 0; /* Number of active content filters */
 388 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 389 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 390 uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 392
 393 static kern_ctl_ref cfil_kctlref = NULL;
 394
 395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 396 static lck_attr_t *cfil_lck_attr = NULL;
 397 static lck_grp_t *cfil_lck_grp = NULL;
 398 decl_lck_rw_data(static, cfil_lck_rw);
 399
 400 #define CFIL_RW_LCK_MAX 8
 401
 402 int cfil_rw_nxt_lck = 0;
 403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 404
 405 int cfil_rw_nxt_unlck = 0;
 406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 407
 408 static ZONE_DECLARE(content_filter_zone, "content_filter",
 409     sizeof(struct content_filter), ZC_NONE);
 410
 411 MBUFQ_HEAD(cfil_mqhead);
 412
 413 struct cfil_queue {
 414         uint64_t                q_start; /* offset of first byte in queue */
 415         uint64_t                q_end; /* offset of last byte in queue */
 416         struct cfil_mqhead      q_mq;
 417 };
 418
 419 /*
 420  * struct cfil_entry
 421  *
 422  * The is one entry per content filter
 423  */
 424 struct cfil_entry {
 425         TAILQ_ENTRY(cfil_entry) cfe_link;
 426         SLIST_ENTRY(cfil_entry) cfe_order_link;
 427         struct content_filter   *cfe_filter;
 428
 429         struct cfil_info        *cfe_cfil_info;
 430         uint32_t                cfe_flags;
 431         uint32_t                cfe_necp_control_unit;
 432         struct timeval          cfe_last_event; /* To user space */
 433         struct timeval          cfe_last_action; /* From user space */
 434         uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
 435         uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
 436         struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
 437         uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
 438         boolean_t               cfe_laddr_sent;
 439
 440         struct cfe_buf {
 441                 /*
 442                  * cfe_pending_q holds data that has been delivered to
 443                  * the filter and for which we are waiting for an action
 444                  */
 445                 struct cfil_queue       cfe_pending_q;
 446                 /*
 447                  * This queue is for data that has not be delivered to
 448                  * the content filter (new data, pass peek or flow control)
 449                  */
 450                 struct cfil_queue       cfe_ctl_q;
 451
 452                 uint64_t                cfe_pass_offset;
 453                 uint64_t                cfe_peek_offset;
 454                 uint64_t                cfe_peeked;
 455         } cfe_snd, cfe_rcv;
 456 };
 457
 458 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 459 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 460 #define CFEF_DATA_START                 0x0004  /* can send data event */
 461 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 462 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 463 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 464 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 465 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 466
 467
 468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 469                 struct timeval64 _tdiff;                                                                                          \
 470                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 471                         timersub(t1, t0, &_tdiff);                                                                              \
 472                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 473                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 474                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 475                 }
 476
 477 struct cfil_hash_entry;
 478
 479 /*
 480  * struct cfil_info
 481  *
 482  * There is a struct cfil_info per socket
 483  */
 484 struct cfil_info {
 485         TAILQ_ENTRY(cfil_info)  cfi_link;
 486         TAILQ_ENTRY(cfil_info)  cfi_link_stats;
 487         struct socket           *cfi_so;
 488         uint64_t                cfi_flags;
 489         uint64_t                cfi_sock_id;
 490         struct timeval64        cfi_first_event;
 491         uint32_t                cfi_op_list_ctr;
 492         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 493         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 494         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 495         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 496
 497         int                     cfi_dir;
 498         uint64_t                cfi_byte_inbound_count;
 499         uint64_t                cfi_byte_outbound_count;
 500
 501         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 502         u_int32_t               cfi_filter_control_unit;
 503         u_int32_t               cfi_debug;
 504         struct cfi_buf {
 505                 /*
 506                  * cfi_pending_first and cfi_pending_last describe the total
 507                  * amount of data outstanding for all the filters on
 508                  * this socket and data in the flow queue
 509                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 510                  */
 511                 uint64_t                cfi_pending_first;
 512                 uint64_t                cfi_pending_last;
 513                 uint32_t                cfi_pending_mbcnt;
 514                 uint32_t                cfi_pending_mbnum;
 515                 uint32_t                cfi_tail_drop_cnt;
 516                 /*
 517                  * cfi_pass_offset is the minimum of all the filters
 518                  */
 519                 uint64_t                cfi_pass_offset;
 520                 /*
 521                  * cfi_inject_q holds data that needs to be re-injected
 522                  * into the socket after filtering and that can
 523                  * be queued because of flow control
 524                  */
 525                 struct cfil_queue       cfi_inject_q;
 526         } cfi_snd, cfi_rcv;
 527
 528         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 529         struct cfil_hash_entry *cfi_hash_entry;
 530         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 531         os_refcnt_t             cfi_ref_count;
 532 } __attribute__((aligned(8)));
 533
 534 #define CFIF_DROP               0x0001  /* drop action applied */
 535 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 536 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 537 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 538 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 539 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 540 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 541 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 542 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 543
 544 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 545 #define CFI_SHIFT_GENCNT        32
 546 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 547 #define CFI_SHIFT_FLOWHASH      0
 548
 549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
 550
 551 static ZONE_DECLARE(cfil_info_zone, "cfil_info",
 552     sizeof(struct cfil_info), ZC_NONE);
 553
 554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 556
 557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 559
 560 /*
 561  * UDP Socket Support
 562  */
 563 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 564 #define CFILHASHSIZE 16
 565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 566
 567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
 568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
 569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
 571                                            (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
 572 #define IS_RAW(so)  (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW  && so->so_proto->pr_protocol == IPPROTO_RAW)
 573
 574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
 576 #else
 577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
 578 #endif
 579
 580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
 581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
 582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 583
 584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 585                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 588                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 592                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 593                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
 595                    ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
 596                     entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
 597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
 598
 599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
 600     (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
 601      (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
 602       so->so_proto->pr_type != SOCK_STREAM || \
 603       so->so_proto->pr_protocol != IPPROTO_TCP || \
 604       (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
 605       (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
 606
 607 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
 608
 609 #define CFIL_INFO_FREE(cfil_info) \
 610     if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
 611         cfil_info_free(cfil_info); \
 612     }
 613
 614 /*
 615  * Periodic Statistics Report:
 616  */
 617 static struct thread *cfil_stats_report_thread;
 618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
 619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
 620 #define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
 621
 622 /* This buffer must have same layout as struct cfil_msg_stats_report */
 623 struct cfil_stats_report_buffer {
 624         struct cfil_msg_hdr        msghdr;
 625         uint32_t                   count;
 626         struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
 627 };
 628 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
 629 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
 630
 631 /*
 632  * UDP Garbage Collection:
 633  */
 634 static struct thread *cfil_udp_gc_thread;
 635 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 636 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 637 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 639
 640 /*
 641  * UDP flow queue thresholds
 642  */
 643 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 644 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 645 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 646 /*
 647  * UDP flow queue threshold globals:
 648  */
 649 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 650 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 651
 652 /*
 653  * struct cfil_hash_entry
 654  *
 655  * Hash entry for cfil_info
 656  */
 657 struct cfil_hash_entry {
 658         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 659         struct cfil_info               *cfentry_cfil;
 660         u_short cfentry_fport;
 661         u_short cfentry_lport;
 662         sa_family_t                    cfentry_family;
 663         u_int32_t                      cfentry_flowhash;
 664         u_int64_t                      cfentry_lastused;
 665         union {
 666                 /* foreign host table entry */
 667                 struct in_addr_4in6 addr46;
 668                 struct in6_addr addr6;
 669         } cfentry_faddr;
 670         union {
 671                 /* local host table entry */
 672                 struct in_addr_4in6 addr46;
 673                 struct in6_addr addr6;
 674         } cfentry_laddr;
 675         uint8_t                        cfentry_laddr_updated: 1;
 676         uint8_t                        cfentry_lport_updated: 1;
 677         uint8_t                        cfentry_reserved: 6;
 678 };
 679
 680 /*
 681  * struct cfil_db
 682  *
 683  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 684  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 685  */
 686 struct cfil_db {
 687         struct socket       *cfdb_so;
 688         uint32_t            cfdb_count;       /* Number of total content filters */
 689         struct cfilhashhead *cfdb_hashbase;
 690         u_long              cfdb_hashmask;
 691         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 692 };
 693
 694 /*
 695  * CFIL specific mbuf tag:
 696  * Save state of socket at the point of data entry into cfil.
 697  * Use saved state for reinjection at protocol layer.
 698  */
 699 struct cfil_tag {
 700         union sockaddr_in_4_6 cfil_faddr;
 701         uint32_t cfil_so_state_change_cnt;
 702         uint32_t cfil_so_options;
 703         int cfil_inp_flags;
 704 };
 705
 706 static ZONE_DECLARE(cfil_hash_entry_zone, "cfil_entry_hash",
 707     sizeof(struct cfil_hash_entry), ZC_NONE);
 708
 709 static ZONE_DECLARE(cfil_db_zone, "cfil_db",
 710     sizeof(struct cfil_db), ZC_NONE);
 711
 712 /*
 713  * Statistics
 714  */
 715
 716 struct cfil_stats cfil_stats;
 717
 718 /*
 719  * For troubleshooting
 720  */
 721 int cfil_log_level = LOG_ERR;
 722 int cfil_debug = 1;
 723
 724 // Debug controls added for selective debugging.
 725 // Disabled for production.  If enabled,
 726 // these will have performance impact
 727 #define LIFECYCLE_DEBUG 0
 728 #define VERDICT_DEBUG 0
 729 #define DATA_DEBUG 0
 730 #define SHOW_DEBUG 0
 731 #define GC_DEBUG 0
 732 #define STATS_DEBUG 0
 733
 734 /*
 735  * Sysctls for logs and statistics
 736  */
 737 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 738     struct sysctl_req *);
 739 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 740     struct sysctl_req *);
 741
 742 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 743
 744 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 745     &cfil_log_level, 0, "");
 746
 747 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 748     &cfil_debug, 0, "");
 749
 750 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 751     &cfil_sock_attached_count, 0, "");
 752
 753 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 754     &cfil_active_count, 0, "");
 755
 756 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 757     &cfil_close_wait_timeout, 0, "");
 758
 759 static int cfil_sbtrim = 1;
 760 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 761     &cfil_sbtrim, 0, "");
 762
 763 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 764     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 765
 766 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 767     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 768
 769 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 770     &cfil_stats, cfil_stats, "");
 771
 772 /*
 773  * Forward declaration to appease the compiler
 774  */
 775 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 776     uint64_t, uint64_t);
 777 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 780 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 781 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 782     struct mbuf *, struct mbuf *, uint32_t);
 783 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 784     struct mbuf *, uint32_t);
 785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 786     struct in_addr, u_int16_t);
 787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 788     struct in6_addr *, u_int16_t);
 789
 790 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 791 static void cfil_info_free(struct cfil_info *);
 792 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 793 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 794 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 795 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 796 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 797 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 798 static void cfil_info_verify(struct cfil_info *);
 799 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 800     uint64_t, uint64_t);
 801 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 802 static void cfil_release_sockbuf(struct socket *, int);
 803 static int cfil_filters_attached(struct socket *);
 804
 805 static void cfil_rw_lock_exclusive(lck_rw_t *);
 806 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 807 static void cfil_rw_lock_shared(lck_rw_t *);
 808 static void cfil_rw_unlock_shared(lck_rw_t *);
 809 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 811
 812 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 813 static errno_t cfil_db_init(struct socket *);
 814 static void cfil_db_free(struct socket *so);
 815 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
 816 struct cfil_hash_entry *cfil_db_lookup_entry_internal(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t, boolean_t);
 817 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 818 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 819 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *, struct mbuf *);
 820 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 821 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, struct mbuf *, int);
 822 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 823 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 824     struct mbuf *, struct mbuf *, uint32_t);
 825 static int cfil_sock_udp_get_address_from_control(sa_family_t, struct mbuf *, uint8_t **);
 826 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 827 static void cfil_sock_udp_is_closed(struct socket *);
 828 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 829 static int cfil_sock_udp_shutdown(struct socket *, int *);
 830 static void cfil_sock_udp_close_wait(struct socket *);
 831 static void cfil_sock_udp_buf_update(struct sockbuf *);
 832 static int cfil_filters_udp_attached(struct socket *, bool);
 833 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 834     struct in6_addr **, struct in6_addr **,
 835     u_int16_t *, u_int16_t *);
 836 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 837     struct in_addr *, struct in_addr *,
 838     u_int16_t *, u_int16_t *);
 839 static void cfil_info_log(int, struct cfil_info *, const char *);
 840 void cfil_filter_show(u_int32_t);
 841 void cfil_info_show(void);
 842 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int64_t);
 843 bool cfil_info_action_timed_out(struct cfil_info *, int);
 844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 845 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
 846 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
 847 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 848 static void cfil_info_udp_expire(void *, wait_result_t);
 849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *, bool);
 850 static void cfil_sock_received_verdict(struct socket *so);
 851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 852     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 853     boolean_t, boolean_t);
 854 static void cfil_stats_report_thread_func(void *, wait_result_t);
 855 static void cfil_stats_report(void *v, wait_result_t w);
 856
 857 bool check_port(struct sockaddr *, u_short);
 858
 859 /*
 860  * Content filter global read write lock
 861  */
 862
 863 static void
 864 cfil_rw_lock_exclusive(lck_rw_t *lck)
 865 {
 866         void *lr_saved;
 867
 868         lr_saved = __builtin_return_address(0);
 869
 870         lck_rw_lock_exclusive(lck);
 871
 872         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 873         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 874 }
 875
 876 static void
 877 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 878 {
 879         void *lr_saved;
 880
 881         lr_saved = __builtin_return_address(0);
 882
 883         lck_rw_unlock_exclusive(lck);
 884
 885         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 886         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 887 }
 888
 889 static void
 890 cfil_rw_lock_shared(lck_rw_t *lck)
 891 {
 892         void *lr_saved;
 893
 894         lr_saved = __builtin_return_address(0);
 895
 896         lck_rw_lock_shared(lck);
 897
 898         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 899         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 900 }
 901
 902 static void
 903 cfil_rw_unlock_shared(lck_rw_t *lck)
 904 {
 905         void *lr_saved;
 906
 907         lr_saved = __builtin_return_address(0);
 908
 909         lck_rw_unlock_shared(lck);
 910
 911         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 912         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 913 }
 914
 915 static boolean_t
 916 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 917 {
 918         void *lr_saved;
 919         boolean_t upgraded;
 920
 921         lr_saved = __builtin_return_address(0);
 922
 923         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 924         if (upgraded) {
 925                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 926                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 927         }
 928         return upgraded;
 929 }
 930
 931 static void
 932 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 933 {
 934         void *lr_saved;
 935
 936         lr_saved = __builtin_return_address(0);
 937
 938         lck_rw_lock_exclusive_to_shared(lck);
 939
 940         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 941         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 942 }
 943
 944 static void
 945 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 946 {
 947 #if !MACH_ASSERT
 948 #pragma unused(lck, exclusive)
 949 #endif
 950         LCK_RW_ASSERT(lck,
 951             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 952 }
 953
 954 /*
 955  * Return the number of bytes in the mbuf chain using the same
 956  * method as m_length() or sballoc()
 957  *
 958  * Returns data len - starting from PKT start
 959  * - retmbcnt - optional param to get total mbuf bytes in chain
 960  * - retmbnum - optional param to get number of mbufs in chain
 961  */
 962 static unsigned int
 963 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 964 {
 965         struct mbuf *m0;
 966         unsigned int pktlen = 0;
 967         int mbcnt;
 968         int mbnum;
 969
 970         // Locate the start of data
 971         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 972                 if (m0->m_flags & M_PKTHDR) {
 973                         break;
 974                 }
 975         }
 976         if (m0 == NULL) {
 977                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 978                 return 0;
 979         }
 980         m = m0;
 981
 982         if (retmbcnt == NULL && retmbnum == NULL) {
 983                 return m_length(m);
 984         }
 985
 986         pktlen = 0;
 987         mbcnt = 0;
 988         mbnum = 0;
 989         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 990                 pktlen += m0->m_len;
 991                 mbnum++;
 992                 mbcnt += MSIZE;
 993                 if (m0->m_flags & M_EXT) {
 994                         mbcnt += m0->m_ext.ext_size;
 995                 }
 996         }
 997         if (retmbcnt) {
 998                 *retmbcnt = mbcnt;
 999         }
1000         if (retmbnum) {
1001                 *retmbnum = mbnum;
1002         }
1003         return pktlen;
1004 }
1005
1006 static struct mbuf *
1007 cfil_data_start(struct mbuf *m)
1008 {
1009         struct mbuf *m0;
1010
1011         // Locate the start of data
1012         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1013                 if (m0->m_flags & M_PKTHDR) {
1014                         break;
1015                 }
1016         }
1017         return m0;
1018 }
1019
1020 /*
1021  * Common mbuf queue utilities
1022  */
1023
1024 static inline void
1025 cfil_queue_init(struct cfil_queue *cfq)
1026 {
1027         cfq->q_start = 0;
1028         cfq->q_end = 0;
1029         MBUFQ_INIT(&cfq->q_mq);
1030 }
1031
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue *cfq)
1034 {
1035         uint64_t drained = cfq->q_start - cfq->q_end;
1036         cfq->q_start = 0;
1037         cfq->q_end = 0;
1038         MBUFQ_DRAIN(&cfq->q_mq);
1039
1040         return drained;
1041 }
1042
1043 /* Return 1 when empty, 0 otherwise */
1044 static inline int
1045 cfil_queue_empty(struct cfil_queue *cfq)
1046 {
1047         return MBUFQ_EMPTY(&cfq->q_mq);
1048 }
1049
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue *cfq)
1052 {
1053         return cfq->q_start;
1054 }
1055
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue *cfq)
1058 {
1059         return cfq->q_end;
1060 }
1061
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue *cfq)
1064 {
1065         return cfq->q_end - cfq->q_start;
1066 }
1067
1068 /*
1069  * Routines to verify some fundamental assumptions
1070  */
1071
1072 static void
1073 cfil_queue_verify(struct cfil_queue *cfq)
1074 {
1075         mbuf_t chain;
1076         mbuf_t m;
1077         mbuf_t n;
1078         uint64_t queuesize = 0;
1079
1080         /* Verify offset are ordered */
1081         VERIFY(cfq->q_start <= cfq->q_end);
1082
1083         /*
1084          * When queue is empty, the offsets are equal otherwise the offsets
1085          * are different
1086          */
1087         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1088             (!MBUFQ_EMPTY(&cfq->q_mq) &&
1089             cfq->q_start != cfq->q_end));
1090
1091         MBUFQ_FOREACH(chain, &cfq->q_mq) {
1092                 size_t chainsize = 0;
1093                 m = chain;
1094                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1095                 // skip the addr and control stuff if present
1096                 m = cfil_data_start(m);
1097
1098                 if (m == NULL ||
1099                     m == (void *)M_TAG_FREE_PATTERN ||
1100                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1101                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1102                         panic("%s - mq %p is free at %p", __func__,
1103                             &cfq->q_mq, m);
1104                 }
1105                 for (n = m; n != NULL; n = n->m_next) {
1106                         if (n->m_type != MT_DATA &&
1107                             n->m_type != MT_HEADER &&
1108                             n->m_type != MT_OOBDATA) {
1109                                 panic("%s - %p unsupported type %u", __func__,
1110                                     n, n->m_type);
1111                         }
1112                         chainsize += n->m_len;
1113                 }
1114                 if (mlen != chainsize) {
1115                         panic("%s - %p m_length() %u != chainsize %lu",
1116                             __func__, m, mlen, chainsize);
1117                 }
1118                 queuesize += chainsize;
1119         }
1120         if (queuesize != cfq->q_end - cfq->q_start) {
1121                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1122                     m, queuesize, cfq->q_end - cfq->q_start);
1123         }
1124 }
1125
1126 static void
1127 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1128 {
1129         CFIL_QUEUE_VERIFY(cfq);
1130
1131         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1132         cfq->q_end += len;
1133
1134         CFIL_QUEUE_VERIFY(cfq);
1135 }
1136
1137 static void
1138 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1139 {
1140         CFIL_QUEUE_VERIFY(cfq);
1141
1142         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1143
1144         MBUFQ_REMOVE(&cfq->q_mq, m);
1145         MBUFQ_NEXT(m) = NULL;
1146         cfq->q_start += len;
1147
1148         CFIL_QUEUE_VERIFY(cfq);
1149 }
1150
1151 static mbuf_t
1152 cfil_queue_first(struct cfil_queue *cfq)
1153 {
1154         return MBUFQ_FIRST(&cfq->q_mq);
1155 }
1156
1157 static mbuf_t
1158 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1159 {
1160 #pragma unused(cfq)
1161         return MBUFQ_NEXT(m);
1162 }
1163
1164 static void
1165 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1166 {
1167         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1168         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1169
1170         /* Verify the queues are ordered so that pending is before ctl */
1171         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1172
1173         /* The peek offset cannot be less than the pass offset */
1174         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1175
1176         /* Make sure we've updated the offset we peeked at  */
1177         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1178 }
1179
1180 static void
1181 cfil_entry_verify(struct cfil_entry *entry)
1182 {
1183         cfil_entry_buf_verify(&entry->cfe_snd);
1184         cfil_entry_buf_verify(&entry->cfe_rcv);
1185 }
1186
1187 static void
1188 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1189 {
1190         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1191
1192         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1193 }
1194
1195 static void
1196 cfil_info_verify(struct cfil_info *cfil_info)
1197 {
1198         int i;
1199
1200         if (cfil_info == NULL) {
1201                 return;
1202         }
1203
1204         cfil_info_buf_verify(&cfil_info->cfi_snd);
1205         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1206
1207         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1208                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1209         }
1210 }
1211
1212 static void
1213 verify_content_filter(struct content_filter *cfc)
1214 {
1215         struct cfil_entry *entry;
1216         uint32_t count = 0;
1217
1218         VERIFY(cfc->cf_sock_count >= 0);
1219
1220         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1221                 count++;
1222                 VERIFY(cfc == entry->cfe_filter);
1223         }
1224         VERIFY(count == cfc->cf_sock_count);
1225 }
1226
1227 /*
1228  * Kernel control socket callbacks
1229  */
1230 static errno_t
1231 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1232     void **unitinfo)
1233 {
1234         errno_t error = 0;
1235         struct content_filter *cfc = NULL;
1236
1237         CFIL_LOG(LOG_NOTICE, "");
1238
1239         cfc = zalloc(content_filter_zone);
1240         if (cfc == NULL) {
1241                 CFIL_LOG(LOG_ERR, "zalloc failed");
1242                 error = ENOMEM;
1243                 goto done;
1244         }
1245         bzero(cfc, sizeof(struct content_filter));
1246
1247         cfil_rw_lock_exclusive(&cfil_lck_rw);
1248         if (content_filters == NULL) {
1249                 struct content_filter **tmp;
1250
1251                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1252
1253                 MALLOC(tmp,
1254                     struct content_filter **,
1255                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1256                     M_TEMP,
1257                     M_WAITOK | M_ZERO);
1258
1259                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1260
1261                 if (tmp == NULL && content_filters == NULL) {
1262                         error = ENOMEM;
1263                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1264                         goto done;
1265                 }
1266                 /* Another thread may have won the race */
1267                 if (content_filters != NULL) {
1268                         FREE(tmp, M_TEMP);
1269                 } else {
1270                         content_filters = tmp;
1271                 }
1272         }
1273
1274         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1275                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1276                 error = EINVAL;
1277         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1278                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1279                 error = EADDRINUSE;
1280         } else {
1281                 /*
1282                  * kernel control socket kcunit numbers start at 1
1283                  */
1284                 content_filters[sac->sc_unit - 1] = cfc;
1285
1286                 cfc->cf_kcref = kctlref;
1287                 cfc->cf_kcunit = sac->sc_unit;
1288                 TAILQ_INIT(&cfc->cf_sock_entries);
1289
1290                 *unitinfo = cfc;
1291                 cfil_active_count++;
1292
1293                 // Allocate periodic stats buffer for this filter
1294                 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1295                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1296
1297                         struct cfil_stats_report_buffer *buf;
1298
1299                         MALLOC(buf,
1300                             struct cfil_stats_report_buffer *,
1301                             sizeof(struct cfil_stats_report_buffer),
1302                             M_TEMP,
1303                             M_WAITOK | M_ZERO);
1304
1305                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1306
1307                         if (buf == NULL) {
1308                                 error = ENOMEM;
1309                                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1310                                 goto done;
1311                         }
1312
1313                         /* Another thread may have won the race */
1314                         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1315                                 FREE(buf, M_TEMP);
1316                         } else {
1317                                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1318                         }
1319                 }
1320         }
1321         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1322 done:
1323         if (error != 0 && cfc != NULL) {
1324                 zfree(content_filter_zone, cfc);
1325         }
1326
1327         if (error == 0) {
1328                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1329         } else {
1330                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1331         }
1332
1333         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1334             error, cfil_active_count, sac->sc_unit);
1335
1336         return error;
1337 }
1338
1339 static errno_t
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1341 {
1342 #pragma unused(kctlref)
1343         errno_t error = 0;
1344         struct content_filter *cfc;
1345         struct cfil_entry *entry;
1346         uint64_t sock_flow_id = 0;
1347
1348         CFIL_LOG(LOG_NOTICE, "");
1349
1350         if (content_filters == NULL) {
1351                 CFIL_LOG(LOG_ERR, "no content filter");
1352                 error = EINVAL;
1353                 goto done;
1354         }
1355         if (kcunit > MAX_CONTENT_FILTER) {
1356                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357                     kcunit, MAX_CONTENT_FILTER);
1358                 error = EINVAL;
1359                 goto done;
1360         }
1361
1362         cfc = (struct content_filter *)unitinfo;
1363         if (cfc == NULL) {
1364                 goto done;
1365         }
1366
1367         cfil_rw_lock_exclusive(&cfil_lck_rw);
1368         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1369                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1370                     kcunit);
1371                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1372                 goto done;
1373         }
1374         cfc->cf_flags |= CFF_DETACHING;
1375         /*
1376          * Remove all sockets from the filter
1377          */
1378         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1379                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1380
1381                 verify_content_filter(cfc);
1382                 /*
1383                  * Accept all outstanding data by pushing to next filter
1384                  * or back to socket
1385                  *
1386                  * TBD: Actually we should make sure all data has been pushed
1387                  * back to socket
1388                  */
1389                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1390                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1391                         struct socket *so = cfil_info->cfi_so;
1392                         sock_flow_id = cfil_info->cfi_sock_id;
1393
1394                         /* Need to let data flow immediately */
1395                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1396                             CFEF_DATA_START;
1397
1398                         /*
1399                          * Respect locking hierarchy
1400                          */
1401                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1402
1403                         socket_lock(so, 1);
1404
1405                         /*
1406                          * When cfe_filter is NULL the filter is detached
1407                          * and the entry has been removed from cf_sock_entries
1408                          */
1409                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1410                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1411                                 goto release;
1412                         }
1413
1414                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1415                             CFM_MAX_OFFSET,
1416                             CFM_MAX_OFFSET);
1417
1418                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1419                             CFM_MAX_OFFSET,
1420                             CFM_MAX_OFFSET);
1421
1422                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1423
1424                         /*
1425                          * Check again to make sure if the cfil_info is still valid
1426                          * as the socket may have been unlocked when when calling
1427                          * cfil_acquire_sockbuf()
1428                          */
1429                         if (entry->cfe_filter == NULL ||
1430                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1431                                 goto release;
1432                         }
1433
1434                         /* The filter is now detached */
1435                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1436 #if LIFECYCLE_DEBUG
1437                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1438 #endif
1439                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1440                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1441                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1442                             cfil_filters_attached(so) == 0) {
1443                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1444                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1445                                 wakeup((caddr_t)cfil_info);
1446                         }
1447
1448                         /*
1449                          * Remove the filter entry from the content filter
1450                          * but leave the rest of the state intact as the queues
1451                          * may not be empty yet
1452                          */
1453                         entry->cfe_filter = NULL;
1454                         entry->cfe_necp_control_unit = 0;
1455
1456                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1457                         cfc->cf_sock_count--;
1458 release:
1459                         socket_unlock(so, 1);
1460                 }
1461         }
1462         verify_content_filter(cfc);
1463
1464         /* Free the stats buffer for this filter */
1465         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1466                 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1467                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1468         }
1469         VERIFY(cfc->cf_sock_count == 0);
1470
1471         /*
1472          * Make filter inactive
1473          */
1474         content_filters[kcunit - 1] = NULL;
1475         cfil_active_count--;
1476         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1477
1478         if (cfc->cf_crypto_state != NULL) {
1479                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1480                 cfc->cf_crypto_state = NULL;
1481         }
1482
1483         zfree(content_filter_zone, cfc);
1484 done:
1485         if (error == 0) {
1486                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1487         } else {
1488                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1489         }
1490
1491         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1492             error, cfil_active_count, kcunit);
1493
1494         return error;
1495 }
1496
1497 /*
1498  * cfil_acquire_sockbuf()
1499  *
1500  * Prevent any other thread from acquiring the sockbuf
1501  * We use sb_cfil_thread as a semaphore to prevent other threads from
1502  * messing with the sockbuf -- see sblock()
1503  * Note: We do not set SB_LOCK here because the thread may check or modify
1504  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505  * sblock(), sbunlock() or sodefunct()
1506  */
1507 static int
1508 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1509 {
1510         thread_t tp = current_thread();
1511         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1512         lck_mtx_t *mutex_held;
1513         int error = 0;
1514
1515         /*
1516          * Wait until no thread is holding the sockbuf and other content
1517          * filter threads have released the sockbuf
1518          */
1519         while ((sb->sb_flags & SB_LOCK) ||
1520             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1521                 if (so->so_proto->pr_getlock != NULL) {
1522                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1523                 } else {
1524                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1525                 }
1526
1527                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1528
1529                 sb->sb_wantlock++;
1530                 VERIFY(sb->sb_wantlock != 0);
1531
1532                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1533                     NULL);
1534
1535                 VERIFY(sb->sb_wantlock != 0);
1536                 sb->sb_wantlock--;
1537         }
1538         /*
1539          * Use reference count for repetitive calls on same thread
1540          */
1541         if (sb->sb_cfil_refs == 0) {
1542                 VERIFY(sb->sb_cfil_thread == NULL);
1543                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1544
1545                 sb->sb_cfil_thread = tp;
1546                 sb->sb_flags |= SB_LOCK;
1547         }
1548         sb->sb_cfil_refs++;
1549
1550         /* We acquire the socket buffer when we need to cleanup */
1551         if (cfil_info == NULL) {
1552                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1553                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1554                 error = 0;
1555         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1556                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1557                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1558                 error = EPIPE;
1559         }
1560
1561         return error;
1562 }
1563
1564 static void
1565 cfil_release_sockbuf(struct socket *so, int outgoing)
1566 {
1567         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1568         thread_t tp = current_thread();
1569
1570         socket_lock_assert_owned(so);
1571
1572         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1573                 panic("%s sb_cfil_thread %p not current %p", __func__,
1574                     sb->sb_cfil_thread, tp);
1575         }
1576         /*
1577          * Don't panic if we are defunct because SB_LOCK has
1578          * been cleared by sodefunct()
1579          */
1580         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1581                 panic("%s SB_LOCK not set on %p", __func__,
1582                     sb);
1583         }
1584         /*
1585          * We can unlock when the thread unwinds to the last reference
1586          */
1587         sb->sb_cfil_refs--;
1588         if (sb->sb_cfil_refs == 0) {
1589                 sb->sb_cfil_thread = NULL;
1590                 sb->sb_flags &= ~SB_LOCK;
1591
1592                 if (sb->sb_wantlock > 0) {
1593                         wakeup(&sb->sb_flags);
1594                 }
1595         }
1596 }
1597
1598 cfil_sock_id_t
1599 cfil_sock_id_from_socket(struct socket *so)
1600 {
1601         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1602                 return so->so_cfil->cfi_sock_id;
1603         } else {
1604                 return CFIL_SOCK_ID_NONE;
1605         }
1606 }
1607
1608 static bool
1609 cfil_socket_safe_lock(struct inpcb *inp)
1610 {
1611         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1612                 socket_lock(inp->inp_socket, 1);
1613                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1614                         return true;
1615                 }
1616                 socket_unlock(inp->inp_socket, 1);
1617         }
1618         return false;
1619 }
1620
1621 /*
1622  * cfil_socket_safe_lock_rip -
1623  * This routine attempts to lock the rip socket safely.
1624  * The passed in ripcbinfo is assumed to be locked and must be unlocked (regardless
1625  * of success/failure) before calling socket_unlock().  This is to avoid double
1626  * locking since rip_unlock() will lock ripcbinfo if it needs to dispose inpcb when
1627  * so_usecount is 0.
1628  */
1629 static bool
1630 cfil_socket_safe_lock_rip(struct inpcb *inp, struct inpcbinfo *pcbinfo)
1631 {
1632         struct socket *so = NULL;
1633
1634         VERIFY(pcbinfo != NULL);
1635
1636         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1637                 so = inp->inp_socket;
1638                 socket_lock(so, 1);
1639                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1640                         lck_rw_done(pcbinfo->ipi_lock);
1641                         return true;
1642                 }
1643         }
1644
1645         lck_rw_done(pcbinfo->ipi_lock);
1646
1647         if (so) {
1648                 socket_unlock(so, 1);
1649         }
1650         return false;
1651 }
1652
1653 static struct socket *
1654 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1655 {
1656         struct socket *so = NULL;
1657         u_int64_t gencnt = cfil_sock_id >> 32;
1658         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1659         struct inpcb *inp = NULL;
1660         struct inpcbinfo *pcbinfo = NULL;
1661
1662 #if VERDICT_DEBUG
1663         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1664 #endif
1665
1666         if (udp_only) {
1667                 goto find_udp;
1668         }
1669
1670         pcbinfo = &tcbinfo;
1671         lck_rw_lock_shared(pcbinfo->ipi_lock);
1672         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1673                 if (inp->inp_state != INPCB_STATE_DEAD &&
1674                     inp->inp_socket != NULL &&
1675                     inp->inp_flowhash == flowhash &&
1676                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1677                     inp->inp_socket->so_cfil != NULL) {
1678                         if (cfil_socket_safe_lock(inp)) {
1679                                 so = inp->inp_socket;
1680                         }
1681                         break;
1682                 }
1683         }
1684         lck_rw_done(pcbinfo->ipi_lock);
1685         if (so != NULL) {
1686                 goto done;
1687         }
1688
1689 find_udp:
1690
1691         pcbinfo = &udbinfo;
1692         lck_rw_lock_shared(pcbinfo->ipi_lock);
1693         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1694                 if (inp->inp_state != INPCB_STATE_DEAD &&
1695                     inp->inp_socket != NULL &&
1696                     inp->inp_socket->so_cfil_db != NULL &&
1697                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1698                         if (cfil_socket_safe_lock(inp)) {
1699                                 so = inp->inp_socket;
1700                         }
1701                         break;
1702                 }
1703         }
1704         lck_rw_done(pcbinfo->ipi_lock);
1705         if (so != NULL) {
1706                 goto done;
1707         }
1708
1709         pcbinfo = &ripcbinfo;
1710         lck_rw_lock_shared(pcbinfo->ipi_lock);
1711         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1712                 if (inp->inp_state != INPCB_STATE_DEAD &&
1713                     inp->inp_socket != NULL &&
1714                     inp->inp_socket->so_cfil_db != NULL &&
1715                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1716                         if (cfil_socket_safe_lock_rip(inp, pcbinfo)) {
1717                                 so = inp->inp_socket;
1718                         }
1719                         /* pcbinfo is already unlocked, we are done. */
1720                         goto done;
1721                 }
1722         }
1723         lck_rw_done(pcbinfo->ipi_lock);
1724
1725 done:
1726         if (so == NULL) {
1727                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1728                 CFIL_LOG(LOG_DEBUG,
1729                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1730                     cfil_sock_id, gencnt, flowhash);
1731         }
1732
1733         return so;
1734 }
1735
1736 static struct socket *
1737 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1738 {
1739         struct socket *so = NULL;
1740         struct inpcb *inp = NULL;
1741         struct inpcbinfo *pcbinfo = &tcbinfo;
1742
1743         lck_rw_lock_shared(pcbinfo->ipi_lock);
1744         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1745                 if (inp->inp_state != INPCB_STATE_DEAD &&
1746                     inp->inp_socket != NULL &&
1747                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1748                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1749                         if (cfil_socket_safe_lock(inp)) {
1750                                 so = inp->inp_socket;
1751                         }
1752                         break;
1753                 }
1754         }
1755         lck_rw_done(pcbinfo->ipi_lock);
1756         if (so != NULL) {
1757                 goto done;
1758         }
1759
1760         pcbinfo = &udbinfo;
1761         lck_rw_lock_shared(pcbinfo->ipi_lock);
1762         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1763                 if (inp->inp_state != INPCB_STATE_DEAD &&
1764                     inp->inp_socket != NULL &&
1765                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1766                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1767                         if (cfil_socket_safe_lock(inp)) {
1768                                 so = inp->inp_socket;
1769                         }
1770                         break;
1771                 }
1772         }
1773         lck_rw_done(pcbinfo->ipi_lock);
1774
1775 done:
1776         return so;
1777 }
1778
1779 static void
1780 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1781 {
1782         struct cfil_info *cfil = NULL;
1783         Boolean found = FALSE;
1784         int kcunit;
1785
1786         if (cfil_info == NULL) {
1787                 return;
1788         }
1789
1790         if (report_frequency) {
1791                 if (entry == NULL) {
1792                         return;
1793                 }
1794
1795                 // Update stats reporting frequency.
1796                 if (entry->cfe_stats_report_frequency != report_frequency) {
1797                         entry->cfe_stats_report_frequency = report_frequency;
1798                         if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1799                                 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1800                         }
1801                         microuptime(&entry->cfe_stats_report_ts);
1802
1803                         // Insert cfil_info into list only if it is not in yet.
1804                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1805                                 if (cfil == cfil_info) {
1806                                         return;
1807                                 }
1808                         }
1809
1810                         TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1811
1812                         // Wake up stats thread if this is first flow added
1813                         if (cfil_sock_attached_stats_count == 0) {
1814                                 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1815                         }
1816                         cfil_sock_attached_stats_count++;
1817 #if STATS_DEBUG
1818                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1819                             cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1820                             cfil_info->cfi_sock_id,
1821                             entry->cfe_stats_report_frequency);
1822 #endif
1823                 }
1824         } else {
1825                 // Turn off stats reporting for this filter.
1826                 if (entry != NULL) {
1827                         // Already off, no change.
1828                         if (entry->cfe_stats_report_frequency == 0) {
1829                                 return;
1830                         }
1831
1832                         entry->cfe_stats_report_frequency = 0;
1833                         // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1834                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1835                                 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1836                                         return;
1837                                 }
1838                         }
1839                 }
1840
1841                 // No more filter asking for stats for this cfil_info, remove from list.
1842                 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1843                         found = FALSE;
1844                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1845                                 if (cfil == cfil_info) {
1846                                         found = TRUE;
1847                                         break;
1848                                 }
1849                         }
1850                         if (found) {
1851                                 cfil_sock_attached_stats_count--;
1852                                 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1853 #if STATS_DEBUG
1854                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1855                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1856                                     cfil_info->cfi_sock_id);
1857 #endif
1858                         }
1859                 }
1860         }
1861 }
1862
1863 static errno_t
1864 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1865     int flags)
1866 {
1867 #pragma unused(kctlref, flags)
1868         errno_t error = 0;
1869         struct cfil_msg_hdr *msghdr;
1870         struct content_filter *cfc = (struct content_filter *)unitinfo;
1871         struct socket *so;
1872         struct cfil_msg_action *action_msg;
1873         struct cfil_entry *entry;
1874         struct cfil_info *cfil_info = NULL;
1875         unsigned int data_len = 0;
1876
1877         CFIL_LOG(LOG_INFO, "");
1878
1879         if (content_filters == NULL) {
1880                 CFIL_LOG(LOG_ERR, "no content filter");
1881                 error = EINVAL;
1882                 goto done;
1883         }
1884         if (kcunit > MAX_CONTENT_FILTER) {
1885                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1886                     kcunit, MAX_CONTENT_FILTER);
1887                 error = EINVAL;
1888                 goto done;
1889         }
1890         if (m == NULL) {
1891                 CFIL_LOG(LOG_ERR, "null mbuf");
1892                 error = EINVAL;
1893                 goto done;
1894         }
1895         data_len = m_length(m);
1896
1897         if (data_len < sizeof(struct cfil_msg_hdr)) {
1898                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1899                 error = EINVAL;
1900                 goto done;
1901         }
1902         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1903         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1904                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1905                 error = EINVAL;
1906                 goto done;
1907         }
1908         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1909                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1910                 error = EINVAL;
1911                 goto done;
1912         }
1913         if (msghdr->cfm_len > data_len) {
1914                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1915                 error = EINVAL;
1916                 goto done;
1917         }
1918
1919         /* Validate action operation */
1920         switch (msghdr->cfm_op) {
1921         case CFM_OP_DATA_UPDATE:
1922                 OSIncrementAtomic(
1923                         &cfil_stats.cfs_ctl_action_data_update);
1924                 break;
1925         case CFM_OP_DROP:
1926                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1927                 break;
1928         case CFM_OP_BLESS_CLIENT:
1929                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1930                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1931                         error = EINVAL;
1932                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1933                             msghdr->cfm_len,
1934                             msghdr->cfm_op);
1935                         goto done;
1936                 }
1937                 error = cfil_action_bless_client(kcunit, msghdr);
1938                 goto done;
1939         case CFM_OP_SET_CRYPTO_KEY:
1940                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1941                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1942                         error = EINVAL;
1943                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1944                             msghdr->cfm_len,
1945                             msghdr->cfm_op);
1946                         goto done;
1947                 }
1948                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1949                 goto done;
1950         default:
1951                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1952                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1953                 error = EINVAL;
1954                 goto done;
1955         }
1956         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1957                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1958                 error = EINVAL;
1959                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1960                     msghdr->cfm_len,
1961                     msghdr->cfm_op);
1962                 goto done;
1963         }
1964         cfil_rw_lock_shared(&cfil_lck_rw);
1965         if (cfc != (void *)content_filters[kcunit - 1]) {
1966                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1967                     kcunit);
1968                 error = EINVAL;
1969                 cfil_rw_unlock_shared(&cfil_lck_rw);
1970                 goto done;
1971         }
1972         cfil_rw_unlock_shared(&cfil_lck_rw);
1973
1974         // Search for socket (TCP+UDP and lock so)
1975         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1976         if (so == NULL) {
1977                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1978                     msghdr->cfm_sock_id);
1979                 error = EINVAL;
1980                 goto done;
1981         }
1982
1983         cfil_info = so->so_cfil_db != NULL ?
1984             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1985
1986         // We should not obtain global lock here in order to avoid deadlock down the path.
1987         // But we attempt to retain a valid cfil_info to prevent any deallocation until
1988         // we are done.  Abort retain if cfil_info has already entered the free code path.
1989         if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1990                 socket_unlock(so, 1);
1991                 goto done;
1992         }
1993
1994         if (cfil_info == NULL) {
1995                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1996                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1997                 error = EINVAL;
1998                 goto unlock;
1999         } else if (cfil_info->cfi_flags & CFIF_DROP) {
2000                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
2001                     (uint64_t)VM_KERNEL_ADDRPERM(so));
2002                 error = EINVAL;
2003                 goto unlock;
2004         }
2005
2006         if (cfil_info->cfi_debug) {
2007                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
2008         }
2009
2010         entry = &cfil_info->cfi_entries[kcunit - 1];
2011         if (entry->cfe_filter == NULL) {
2012                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
2013                     (uint64_t)VM_KERNEL_ADDRPERM(so));
2014                 error = EINVAL;
2015                 goto unlock;
2016         }
2017
2018         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
2019                 entry->cfe_flags |= CFEF_DATA_START;
2020         } else {
2021                 CFIL_LOG(LOG_ERR,
2022                     "so %llx attached not sent for %u",
2023                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2024                 error = EINVAL;
2025                 goto unlock;
2026         }
2027
2028         microuptime(&entry->cfe_last_action);
2029         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
2030
2031         action_msg = (struct cfil_msg_action *)msghdr;
2032
2033         switch (msghdr->cfm_op) {
2034         case CFM_OP_DATA_UPDATE:
2035
2036                 if (cfil_info->cfi_debug) {
2037                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2038                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2039                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2040                             cfil_info->cfi_sock_id,
2041                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2042                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2043                 }
2044
2045 #if VERDICT_DEBUG
2046                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2047                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2048                     cfil_info->cfi_sock_id,
2049                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2050                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2051 #endif
2052                 /*
2053                  * Received verdict, at this point we know this
2054                  * socket connection is allowed.  Unblock thread
2055                  * immediately before proceeding to process the verdict.
2056                  */
2057                 cfil_sock_received_verdict(so);
2058
2059                 if (action_msg->cfa_out_peek_offset != 0 ||
2060                     action_msg->cfa_out_pass_offset != 0) {
2061                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2062                             action_msg->cfa_out_pass_offset,
2063                             action_msg->cfa_out_peek_offset);
2064                 }
2065                 if (error == EJUSTRETURN) {
2066                         error = 0;
2067                 }
2068                 if (error != 0) {
2069                         break;
2070                 }
2071                 if (action_msg->cfa_in_peek_offset != 0 ||
2072                     action_msg->cfa_in_pass_offset != 0) {
2073                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2074                             action_msg->cfa_in_pass_offset,
2075                             action_msg->cfa_in_peek_offset);
2076                 }
2077                 if (error == EJUSTRETURN) {
2078                         error = 0;
2079                 }
2080
2081                 // Toggle stats reporting according to received verdict.
2082                 cfil_rw_lock_exclusive(&cfil_lck_rw);
2083                 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2084                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2085
2086                 break;
2087
2088         case CFM_OP_DROP:
2089                 if (cfil_info->cfi_debug) {
2090                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2091                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2092                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2093                             cfil_info->cfi_sock_id,
2094                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2095                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2096                 }
2097
2098 #if VERDICT_DEBUG
2099                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2100                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2101                     cfil_info->cfi_sock_id,
2102                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2103                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2104 #endif
2105                 error = cfil_action_drop(so, cfil_info, kcunit);
2106                 cfil_sock_received_verdict(so);
2107                 break;
2108
2109         default:
2110                 error = EINVAL;
2111                 break;
2112         }
2113 unlock:
2114         CFIL_INFO_FREE(cfil_info)
2115         socket_unlock(so, 1);
2116 done:
2117         mbuf_freem(m);
2118
2119         if (error == 0) {
2120                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2121         } else {
2122                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2123         }
2124
2125         return error;
2126 }
2127
2128 static errno_t
2129 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2130     int opt, void *data, size_t *len)
2131 {
2132 #pragma unused(kctlref, opt)
2133         struct cfil_info *cfil_info = NULL;
2134         errno_t error = 0;
2135         struct content_filter *cfc = (struct content_filter *)unitinfo;
2136
2137         CFIL_LOG(LOG_NOTICE, "");
2138
2139         cfil_rw_lock_shared(&cfil_lck_rw);
2140
2141         if (content_filters == NULL) {
2142                 CFIL_LOG(LOG_ERR, "no content filter");
2143                 error = EINVAL;
2144                 goto done;
2145         }
2146         if (kcunit > MAX_CONTENT_FILTER) {
2147                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2148                     kcunit, MAX_CONTENT_FILTER);
2149                 error = EINVAL;
2150                 goto done;
2151         }
2152         if (cfc != (void *)content_filters[kcunit - 1]) {
2153                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2154                     kcunit);
2155                 error = EINVAL;
2156                 goto done;
2157         }
2158         switch (opt) {
2159         case CFIL_OPT_NECP_CONTROL_UNIT:
2160                 if (*len < sizeof(uint32_t)) {
2161                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2162                         error = EINVAL;
2163                         goto done;
2164                 }
2165                 if (data != NULL) {
2166                         *(uint32_t *)data = cfc->cf_necp_control_unit;
2167                 }
2168                 break;
2169         case CFIL_OPT_GET_SOCKET_INFO:
2170                 if (*len != sizeof(struct cfil_opt_sock_info)) {
2171                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2172                         error = EINVAL;
2173                         goto done;
2174                 }
2175                 if (data == NULL) {
2176                         CFIL_LOG(LOG_ERR, "data not passed");
2177                         error = EINVAL;
2178                         goto done;
2179                 }
2180
2181                 struct cfil_opt_sock_info *sock_info =
2182                     (struct cfil_opt_sock_info *) data;
2183
2184                 // Unlock here so that we never hold both cfil_lck_rw and the
2185                 // socket_lock at the same time. Otherwise, this can deadlock
2186                 // because soclose() takes the socket_lock and then exclusive
2187                 // cfil_lck_rw and we require the opposite order.
2188
2189                 // WARNING: Be sure to never use anything protected
2190                 //     by cfil_lck_rw beyond this point.
2191                 // WARNING: Be sure to avoid fallthrough and
2192                 //     goto return_already_unlocked from this branch.
2193                 cfil_rw_unlock_shared(&cfil_lck_rw);
2194
2195                 // Search (TCP+UDP) and lock socket
2196                 struct socket *sock =
2197                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2198                 if (sock == NULL) {
2199 #if LIFECYCLE_DEBUG
2200                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2201                             sock_info->cfs_sock_id);
2202 #endif
2203                         error = ENOENT;
2204                         goto return_already_unlocked;
2205                 }
2206
2207                 cfil_info = (sock->so_cfil_db != NULL) ?
2208                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2209
2210                 if (cfil_info == NULL) {
2211 #if LIFECYCLE_DEBUG
2212                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2213                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
2214 #endif
2215                         error = EINVAL;
2216                         socket_unlock(sock, 1);
2217                         goto return_already_unlocked;
2218                 }
2219
2220                 // Fill out family, type, and protocol
2221                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2222                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2223                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2224
2225                 // Source and destination addresses
2226                 struct inpcb *inp = sotoinpcb(sock);
2227                 if (inp->inp_vflag & INP_IPV6) {
2228                         struct in6_addr *laddr = NULL, *faddr = NULL;
2229                         u_int16_t lport = 0, fport = 0;
2230
2231                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2232                             &laddr, &faddr, &lport, &fport);
2233                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2234                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2235                 } else if (inp->inp_vflag & INP_IPV4) {
2236                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2237                         u_int16_t lport = 0, fport = 0;
2238
2239                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2240                             &laddr, &faddr, &lport, &fport);
2241                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2242                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2243                 }
2244
2245                 // Set the pid info
2246                 sock_info->cfs_pid = sock->last_pid;
2247                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2248
2249                 if (sock->so_flags & SOF_DELEGATED) {
2250                         sock_info->cfs_e_pid = sock->e_pid;
2251                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2252                 } else {
2253                         sock_info->cfs_e_pid = sock->last_pid;
2254                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2255                 }
2256
2257                 socket_unlock(sock, 1);
2258
2259                 goto return_already_unlocked;
2260         default:
2261                 error = ENOPROTOOPT;
2262                 break;
2263         }
2264 done:
2265         cfil_rw_unlock_shared(&cfil_lck_rw);
2266
2267         return error;
2268
2269 return_already_unlocked:
2270
2271         return error;
2272 }
2273
2274 static errno_t
2275 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2276     int opt, void *data, size_t len)
2277 {
2278 #pragma unused(kctlref, opt)
2279         errno_t error = 0;
2280         struct content_filter *cfc = (struct content_filter *)unitinfo;
2281
2282         CFIL_LOG(LOG_NOTICE, "");
2283
2284         cfil_rw_lock_exclusive(&cfil_lck_rw);
2285
2286         if (content_filters == NULL) {
2287                 CFIL_LOG(LOG_ERR, "no content filter");
2288                 error = EINVAL;
2289                 goto done;
2290         }
2291         if (kcunit > MAX_CONTENT_FILTER) {
2292                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2293                     kcunit, MAX_CONTENT_FILTER);
2294                 error = EINVAL;
2295                 goto done;
2296         }
2297         if (cfc != (void *)content_filters[kcunit - 1]) {
2298                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2299                     kcunit);
2300                 error = EINVAL;
2301                 goto done;
2302         }
2303         switch (opt) {
2304         case CFIL_OPT_NECP_CONTROL_UNIT:
2305                 if (len < sizeof(uint32_t)) {
2306                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2307                             "len too small %lu", len);
2308                         error = EINVAL;
2309                         goto done;
2310                 }
2311                 if (cfc->cf_necp_control_unit != 0) {
2312                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2313                             "already set %u",
2314                             cfc->cf_necp_control_unit);
2315                         error = EINVAL;
2316                         goto done;
2317                 }
2318                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2319                 break;
2320         default:
2321                 error = ENOPROTOOPT;
2322                 break;
2323         }
2324 done:
2325         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2326
2327         return error;
2328 }
2329
2330
2331 static void
2332 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2333 {
2334 #pragma unused(kctlref, flags)
2335         struct content_filter *cfc = (struct content_filter *)unitinfo;
2336         struct socket *so = NULL;
2337         int error;
2338         struct cfil_entry *entry;
2339         struct cfil_info *cfil_info = NULL;
2340
2341         CFIL_LOG(LOG_INFO, "");
2342
2343         if (content_filters == NULL) {
2344                 CFIL_LOG(LOG_ERR, "no content filter");
2345                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2346                 return;
2347         }
2348         if (kcunit > MAX_CONTENT_FILTER) {
2349                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2350                     kcunit, MAX_CONTENT_FILTER);
2351                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2352                 return;
2353         }
2354         cfil_rw_lock_shared(&cfil_lck_rw);
2355         if (cfc != (void *)content_filters[kcunit - 1]) {
2356                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2357                     kcunit);
2358                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2359                 goto done;
2360         }
2361         /* Let's assume the flow control is lifted */
2362         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2363                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2364                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2365                 }
2366
2367                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2368
2369                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2370                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2371         }
2372         /*
2373          * Flow control will be raised again as soon as an entry cannot enqueue
2374          * to the kernel control socket
2375          */
2376         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2377                 verify_content_filter(cfc);
2378
2379                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2380
2381                 /* Find an entry that is flow controlled */
2382                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2383                         if (entry->cfe_cfil_info == NULL ||
2384                             entry->cfe_cfil_info->cfi_so == NULL) {
2385                                 continue;
2386                         }
2387                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2388                                 continue;
2389                         }
2390                 }
2391                 if (entry == NULL) {
2392                         break;
2393                 }
2394
2395                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2396
2397                 cfil_info = entry->cfe_cfil_info;
2398                 so = cfil_info->cfi_so;
2399
2400                 cfil_rw_unlock_shared(&cfil_lck_rw);
2401                 socket_lock(so, 1);
2402
2403                 do {
2404                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2405                         if (error == 0) {
2406                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2407                         }
2408                         cfil_release_sockbuf(so, 1);
2409                         if (error != 0) {
2410                                 break;
2411                         }
2412
2413                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2414                         if (error == 0) {
2415                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2416                         }
2417                         cfil_release_sockbuf(so, 0);
2418                 } while (0);
2419
2420                 socket_lock_assert_owned(so);
2421                 socket_unlock(so, 1);
2422
2423                 cfil_rw_lock_shared(&cfil_lck_rw);
2424         }
2425 done:
2426         cfil_rw_unlock_shared(&cfil_lck_rw);
2427 }
2428
2429 void
2430 cfil_init(void)
2431 {
2432         struct kern_ctl_reg kern_ctl;
2433         errno_t error = 0;
2434         unsigned int mbuf_limit = 0;
2435
2436         CFIL_LOG(LOG_NOTICE, "");
2437
2438         /*
2439          * Compile time verifications
2440          */
2441         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2442         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2443         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2444         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2445
2446         /*
2447          * Runtime time verifications
2448          */
2449         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2450             sizeof(uint32_t)));
2451         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2452             sizeof(uint32_t)));
2453         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2454             sizeof(uint32_t)));
2455         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2456             sizeof(uint32_t)));
2457
2458         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2459             sizeof(uint32_t)));
2460         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2461             sizeof(uint32_t)));
2462
2463         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2464             sizeof(uint32_t)));
2465         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2466             sizeof(uint32_t)));
2467         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2468             sizeof(uint32_t)));
2469         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2470             sizeof(uint32_t)));
2471
2472         /*
2473          * Allocate locks
2474          */
2475         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2476         if (cfil_lck_grp_attr == NULL) {
2477                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2478                 /* NOTREACHED */
2479         }
2480         cfil_lck_grp = lck_grp_alloc_init("content filter",
2481             cfil_lck_grp_attr);
2482         if (cfil_lck_grp == NULL) {
2483                 panic("%s: lck_grp_alloc_init failed", __func__);
2484                 /* NOTREACHED */
2485         }
2486         cfil_lck_attr = lck_attr_alloc_init();
2487         if (cfil_lck_attr == NULL) {
2488                 panic("%s: lck_attr_alloc_init failed", __func__);
2489                 /* NOTREACHED */
2490         }
2491         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2492
2493         TAILQ_INIT(&cfil_sock_head);
2494         TAILQ_INIT(&cfil_sock_head_stats);
2495
2496         /*
2497          * Register kernel control
2498          */
2499         bzero(&kern_ctl, sizeof(kern_ctl));
2500         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2501             sizeof(kern_ctl.ctl_name));
2502         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2503         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2504         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2505         kern_ctl.ctl_connect = cfil_ctl_connect;
2506         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2507         kern_ctl.ctl_send = cfil_ctl_send;
2508         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2509         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2510         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2511         error = ctl_register(&kern_ctl, &cfil_kctlref);
2512         if (error != 0) {
2513                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2514                 return;
2515         }
2516
2517         // Spawn thread for gargage collection
2518         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2519             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2520                 panic_plain("%s: Can't create UDP GC thread", __func__);
2521                 /* NOTREACHED */
2522         }
2523         /* this must not fail */
2524         VERIFY(cfil_udp_gc_thread != NULL);
2525
2526         // Spawn thread for statistics reporting
2527         if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2528             &cfil_stats_report_thread) != KERN_SUCCESS) {
2529                 panic_plain("%s: Can't create statistics report thread", __func__);
2530                 /* NOTREACHED */
2531         }
2532         /* this must not fail */
2533         VERIFY(cfil_stats_report_thread != NULL);
2534
2535         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2536         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2537         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2538         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2539
2540         memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2541 }
2542
2543 struct cfil_info *
2544 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2545 {
2546         int kcunit;
2547         struct cfil_info *cfil_info = NULL;
2548         struct inpcb *inp = sotoinpcb(so);
2549
2550         CFIL_LOG(LOG_INFO, "");
2551
2552         socket_lock_assert_owned(so);
2553
2554         cfil_info = zalloc(cfil_info_zone);
2555         if (cfil_info == NULL) {
2556                 goto done;
2557         }
2558         bzero(cfil_info, sizeof(struct cfil_info));
2559         os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2560
2561         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2562         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2563
2564         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2565                 struct cfil_entry *entry;
2566
2567                 entry = &cfil_info->cfi_entries[kcunit - 1];
2568                 entry->cfe_cfil_info = cfil_info;
2569
2570                 /* Initialize the filter entry */
2571                 entry->cfe_filter = NULL;
2572                 entry->cfe_flags = 0;
2573                 entry->cfe_necp_control_unit = 0;
2574                 entry->cfe_snd.cfe_pass_offset = 0;
2575                 entry->cfe_snd.cfe_peek_offset = 0;
2576                 entry->cfe_snd.cfe_peeked = 0;
2577                 entry->cfe_rcv.cfe_pass_offset = 0;
2578                 entry->cfe_rcv.cfe_peek_offset = 0;
2579                 entry->cfe_rcv.cfe_peeked = 0;
2580                 /*
2581                  * Timestamp the last action to avoid pre-maturely
2582                  * triggering garbage collection
2583                  */
2584                 microuptime(&entry->cfe_last_action);
2585
2586                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2587                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2588                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2589                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2590         }
2591
2592         cfil_rw_lock_exclusive(&cfil_lck_rw);
2593
2594         /*
2595          * Create a cfi_sock_id that's not the socket pointer!
2596          */
2597
2598         if (hash_entry == NULL) {
2599                 // This is the TCP case, cfil_info is tracked per socket
2600                 if (inp->inp_flowhash == 0) {
2601                         inp->inp_flowhash = inp_calc_flowhash(inp);
2602                 }
2603
2604                 so->so_cfil = cfil_info;
2605                 cfil_info->cfi_so = so;
2606                 cfil_info->cfi_sock_id =
2607                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2608         } else {
2609                 // This is the UDP case, cfil_info is tracked in per-socket hash
2610                 cfil_info->cfi_so = so;
2611                 hash_entry->cfentry_cfil = cfil_info;
2612                 cfil_info->cfi_hash_entry = hash_entry;
2613                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2614                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2615                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2616
2617                 // Wake up gc thread if this is first flow added
2618                 if (cfil_sock_udp_attached_count == 0) {
2619                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2620                 }
2621
2622                 cfil_sock_udp_attached_count++;
2623         }
2624
2625         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2626         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2627
2628         cfil_sock_attached_count++;
2629
2630         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2631
2632 done:
2633         if (cfil_info != NULL) {
2634                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2635         } else {
2636                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2637         }
2638
2639         return cfil_info;
2640 }
2641
2642 int
2643 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2644 {
2645         int kcunit;
2646         int attached = 0;
2647
2648         CFIL_LOG(LOG_INFO, "");
2649
2650         socket_lock_assert_owned(so);
2651
2652         cfil_rw_lock_exclusive(&cfil_lck_rw);
2653
2654         for (kcunit = 1;
2655             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2656             kcunit++) {
2657                 struct content_filter *cfc = content_filters[kcunit - 1];
2658                 struct cfil_entry *entry;
2659                 struct cfil_entry *iter_entry;
2660                 struct cfil_entry *iter_prev;
2661
2662                 if (cfc == NULL) {
2663                         continue;
2664                 }
2665                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2666                         continue;
2667                 }
2668
2669                 entry = &cfil_info->cfi_entries[kcunit - 1];
2670
2671                 entry->cfe_filter = cfc;
2672                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2673                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2674                 cfc->cf_sock_count++;
2675
2676                 /* Insert the entry into the list ordered by control unit */
2677                 iter_prev = NULL;
2678                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2679                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2680                                 break;
2681                         }
2682                         iter_prev = iter_entry;
2683                 }
2684
2685                 if (iter_prev == NULL) {
2686                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2687                 } else {
2688                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2689                 }
2690
2691                 verify_content_filter(cfc);
2692                 attached = 1;
2693                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2694         }
2695
2696         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2697
2698         return attached;
2699 }
2700
2701 static void
2702 cfil_info_free(struct cfil_info *cfil_info)
2703 {
2704         int kcunit;
2705         uint64_t in_drain = 0;
2706         uint64_t out_drained = 0;
2707
2708         if (cfil_info == NULL) {
2709                 return;
2710         }
2711
2712         CFIL_LOG(LOG_INFO, "");
2713
2714         cfil_rw_lock_exclusive(&cfil_lck_rw);
2715
2716         for (kcunit = 1;
2717             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2718             kcunit++) {
2719                 struct cfil_entry *entry;
2720                 struct content_filter *cfc;
2721
2722                 entry = &cfil_info->cfi_entries[kcunit - 1];
2723
2724                 /* Don't be silly and try to detach twice */
2725                 if (entry->cfe_filter == NULL) {
2726                         continue;
2727                 }
2728
2729                 cfc = content_filters[kcunit - 1];
2730
2731                 VERIFY(cfc == entry->cfe_filter);
2732
2733                 entry->cfe_filter = NULL;
2734                 entry->cfe_necp_control_unit = 0;
2735                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2736                 cfc->cf_sock_count--;
2737
2738                 verify_content_filter(cfc);
2739         }
2740         if (cfil_info->cfi_hash_entry != NULL) {
2741                 cfil_sock_udp_attached_count--;
2742         }
2743         cfil_sock_attached_count--;
2744         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2745
2746         // Turn off stats reporting for cfil_info.
2747         cfil_info_stats_toggle(cfil_info, NULL, 0);
2748
2749         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2750         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2751
2752         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2753                 struct cfil_entry *entry;
2754
2755                 entry = &cfil_info->cfi_entries[kcunit - 1];
2756                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2757                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2758                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2759                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2760         }
2761         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2762
2763         if (out_drained) {
2764                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2765         }
2766         if (in_drain) {
2767                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2768         }
2769
2770         zfree(cfil_info_zone, cfil_info);
2771 }
2772
2773 /*
2774  * Received a verdict from userspace for a socket.
2775  * Perform any delayed operation if needed.
2776  */
2777 static void
2778 cfil_sock_received_verdict(struct socket *so)
2779 {
2780         if (so == NULL || so->so_cfil == NULL) {
2781                 return;
2782         }
2783
2784         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2785
2786         /*
2787          * If socket has already been connected, trigger
2788          * soisconnected now.
2789          */
2790         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2791                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2792                 soisconnected(so);
2793                 return;
2794         }
2795 }
2796
2797 /*
2798  * Entry point from Sockets layer
2799  * The socket is locked.
2800  *
2801  * Checks if a connected socket is subject to filter and
2802  * pending the initial verdict.
2803  */
2804 boolean_t
2805 cfil_sock_connected_pending_verdict(struct socket *so)
2806 {
2807         if (so == NULL || so->so_cfil == NULL) {
2808                 return false;
2809         }
2810
2811         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2812                 return false;
2813         } else {
2814                 /*
2815                  * Remember that this protocol is already connected, so
2816                  * we will trigger soisconnected() upon receipt of
2817                  * initial verdict later.
2818                  */
2819                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2820                 return true;
2821         }
2822 }
2823
2824 boolean_t
2825 cfil_filter_present(void)
2826 {
2827         return cfil_active_count > 0;
2828 }
2829
2830 /*
2831  * Entry point from Sockets layer
2832  * The socket is locked.
2833  */
2834 errno_t
2835 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2836 {
2837         errno_t error = 0;
2838         uint32_t filter_control_unit;
2839
2840         socket_lock_assert_owned(so);
2841
2842         if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2843                 /*
2844                  * This socket has already been evaluated (and ultimately skipped) by
2845                  * flow divert, so it has also already been through content filter if there
2846                  * is one.
2847                  */
2848                 goto done;
2849         }
2850
2851         /* Limit ourselves to TCP that are not MPTCP subflows */
2852         if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2853                 goto done;
2854         }
2855
2856         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2857         if (filter_control_unit == 0) {
2858                 goto done;
2859         }
2860
2861         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2862                 goto done;
2863         }
2864         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2865                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2866                 goto done;
2867         }
2868         if (cfil_active_count == 0) {
2869                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2870                 goto done;
2871         }
2872         if (so->so_cfil != NULL) {
2873                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2874                 CFIL_LOG(LOG_ERR, "already attached");
2875                 goto done;
2876         } else {
2877                 cfil_info_alloc(so, NULL);
2878                 if (so->so_cfil == NULL) {
2879                         error = ENOMEM;
2880                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2881                         goto done;
2882                 }
2883                 so->so_cfil->cfi_dir = dir;
2884                 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
2885         }
2886         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2887                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2888                     filter_control_unit);
2889                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2890                 goto done;
2891         }
2892         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2893             (uint64_t)VM_KERNEL_ADDRPERM(so),
2894             filter_control_unit, so->so_cfil->cfi_sock_id);
2895
2896         so->so_flags |= SOF_CONTENT_FILTER;
2897         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2898
2899         /* Hold a reference on the socket */
2900         so->so_usecount++;
2901
2902         /*
2903          * Save passed addresses for attach event msg (in case resend
2904          * is needed.
2905          */
2906         if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
2907                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2908         }
2909         if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
2910                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2911         }
2912
2913         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2914         /* We can recover from flow control or out of memory errors */
2915         if (error == ENOBUFS || error == ENOMEM) {
2916                 error = 0;
2917         } else if (error != 0) {
2918                 goto done;
2919         }
2920
2921         CFIL_INFO_VERIFY(so->so_cfil);
2922 done:
2923         return error;
2924 }
2925
2926 /*
2927  * Entry point from Sockets layer
2928  * The socket is locked.
2929  */
2930 errno_t
2931 cfil_sock_detach(struct socket *so)
2932 {
2933         if (IS_IP_DGRAM(so)) {
2934                 cfil_db_free(so);
2935                 return 0;
2936         }
2937
2938         if (so->so_cfil) {
2939                 if (so->so_flags & SOF_CONTENT_FILTER) {
2940                         so->so_flags &= ~SOF_CONTENT_FILTER;
2941                         VERIFY(so->so_usecount > 0);
2942                         so->so_usecount--;
2943                 }
2944                 CFIL_INFO_FREE(so->so_cfil);
2945                 so->so_cfil = NULL;
2946                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2947         }
2948         return 0;
2949 }
2950
2951 /*
2952  * Fill in the address info of an event message from either
2953  * the socket or passed in address info.
2954  */
2955 static void
2956 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2957     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2958     boolean_t isIPv4, boolean_t outgoing)
2959 {
2960         if (isIPv4) {
2961                 struct in_addr laddr = {0}, faddr = {0};
2962                 u_int16_t lport = 0, fport = 0;
2963
2964                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2965
2966                 if (outgoing) {
2967                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2968                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2969                 } else {
2970                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2971                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2972                 }
2973         } else {
2974                 struct in6_addr *laddr = NULL, *faddr = NULL;
2975                 u_int16_t lport = 0, fport = 0;
2976
2977                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2978                 if (outgoing) {
2979                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2980                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2981                 } else {
2982                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2983                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2984                 }
2985         }
2986 }
2987
2988 static boolean_t
2989 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2990     struct cfil_info *cfil_info,
2991     struct cfil_msg_sock_attached *msg)
2992 {
2993         struct cfil_crypto_data data = {};
2994
2995         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2996                 return false;
2997         }
2998
2999         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
3000         data.direction = msg->cfs_conn_dir;
3001
3002         data.pid = msg->cfs_pid;
3003         data.effective_pid = msg->cfs_e_pid;
3004         uuid_copy(data.uuid, msg->cfs_uuid);
3005         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
3006         data.socketProtocol = msg->cfs_sock_protocol;
3007         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3008                 data.remote.sin6 = msg->cfs_dst.sin6;
3009                 data.local.sin6 = msg->cfs_src.sin6;
3010         } else {
3011                 data.remote.sin6 = msg->cfs_src.sin6;
3012                 data.local.sin6 = msg->cfs_dst.sin6;
3013         }
3014
3015         // At attach, if local address is already present, no need to re-sign subsequent data messages.
3016         if (!NULLADDRESS(data.local)) {
3017                 cfil_info->cfi_isSignatureLatest = true;
3018         }
3019
3020         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3021         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3022                 msg->cfs_signature_length = 0;
3023                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
3024                     msg->cfs_msghdr.cfm_sock_id);
3025                 return false;
3026         }
3027
3028         return true;
3029 }
3030
3031 static boolean_t
3032 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3033     struct socket *so, struct cfil_info *cfil_info,
3034     struct cfil_msg_data_event *msg)
3035 {
3036         struct cfil_crypto_data data = {};
3037
3038         if (crypto_state == NULL || msg == NULL ||
3039             so == NULL || cfil_info == NULL) {
3040                 return false;
3041         }
3042
3043         data.sock_id = cfil_info->cfi_sock_id;
3044         data.direction = cfil_info->cfi_dir;
3045         data.pid = so->last_pid;
3046         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3047         if (so->so_flags & SOF_DELEGATED) {
3048                 data.effective_pid = so->e_pid;
3049                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3050         } else {
3051                 data.effective_pid = so->last_pid;
3052                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3053         }
3054         data.socketProtocol = so->so_proto->pr_protocol;
3055
3056         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3057                 data.remote.sin6 = msg->cfc_dst.sin6;
3058                 data.local.sin6 = msg->cfc_src.sin6;
3059         } else {
3060                 data.remote.sin6 = msg->cfc_src.sin6;
3061                 data.local.sin6 = msg->cfc_dst.sin6;
3062         }
3063
3064         // At first data, local address may show up for the first time, update address cache and
3065         // no need to re-sign subsequent data messages anymore.
3066         if (!NULLADDRESS(data.local)) {
3067                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3068                 cfil_info->cfi_isSignatureLatest = true;
3069         }
3070
3071         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3072         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3073                 msg->cfd_signature_length = 0;
3074                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3075                     msg->cfd_msghdr.cfm_sock_id);
3076                 return false;
3077         }
3078
3079         return true;
3080 }
3081
3082 static boolean_t
3083 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3084     struct socket *so, struct cfil_info *cfil_info,
3085     struct cfil_msg_sock_closed *msg)
3086 {
3087         struct cfil_crypto_data data = {};
3088         struct cfil_hash_entry hash_entry = {};
3089         struct cfil_hash_entry *hash_entry_ptr = NULL;
3090         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3091
3092         if (crypto_state == NULL || msg == NULL ||
3093             so == NULL || inp == NULL || cfil_info == NULL) {
3094                 return false;
3095         }
3096
3097         data.sock_id = cfil_info->cfi_sock_id;
3098         data.direction = cfil_info->cfi_dir;
3099
3100         data.pid = so->last_pid;
3101         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3102         if (so->so_flags & SOF_DELEGATED) {
3103                 data.effective_pid = so->e_pid;
3104                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3105         } else {
3106                 data.effective_pid = so->last_pid;
3107                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3108         }
3109         data.socketProtocol = so->so_proto->pr_protocol;
3110
3111         /*
3112          * Fill in address info:
3113          * For UDP, use the cfil_info hash entry directly.
3114          * For TCP, compose an hash entry with the saved addresses.
3115          */
3116         if (cfil_info->cfi_hash_entry != NULL) {
3117                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3118         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3119             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3120                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3121                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3122                 hash_entry_ptr = &hash_entry;
3123         }
3124         if (hash_entry_ptr != NULL) {
3125                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3126                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3127                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3128                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3129         }
3130
3131         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3132         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3133
3134         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3135         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3136                 msg->cfc_signature_length = 0;
3137                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3138                     msg->cfc_msghdr.cfm_sock_id);
3139                 return false;
3140         }
3141
3142         return true;
3143 }
3144
3145 static int
3146 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3147     uint32_t kcunit, int conn_dir)
3148 {
3149         errno_t error = 0;
3150         struct cfil_entry *entry = NULL;
3151         struct cfil_msg_sock_attached msg_attached;
3152         struct content_filter *cfc = NULL;
3153         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3154         struct cfil_hash_entry *hash_entry_ptr = NULL;
3155         struct cfil_hash_entry hash_entry;
3156
3157         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3158         proc_t p = PROC_NULL;
3159         task_t t = TASK_NULL;
3160
3161         socket_lock_assert_owned(so);
3162
3163         cfil_rw_lock_shared(&cfil_lck_rw);
3164
3165         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3166                 error = EINVAL;
3167                 goto done;
3168         }
3169
3170         if (kcunit == 0) {
3171                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3172         } else {
3173                 entry = &cfil_info->cfi_entries[kcunit - 1];
3174         }
3175
3176         if (entry == NULL) {
3177                 goto done;
3178         }
3179
3180         cfc = entry->cfe_filter;
3181         if (cfc == NULL) {
3182                 goto done;
3183         }
3184
3185         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3186                 goto done;
3187         }
3188
3189         if (kcunit == 0) {
3190                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3191         }
3192
3193         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3194             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3195
3196         /* Would be wasteful to try when flow controlled */
3197         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3198                 error = ENOBUFS;
3199                 goto done;
3200         }
3201
3202         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3203         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3204         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3205         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3206         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3207         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3208
3209         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3210         msg_attached.cfs_sock_type = so->so_proto->pr_type;
3211         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3212         msg_attached.cfs_pid = so->last_pid;
3213         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3214         if (so->so_flags & SOF_DELEGATED) {
3215                 msg_attached.cfs_e_pid = so->e_pid;
3216                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3217         } else {
3218                 msg_attached.cfs_e_pid = so->last_pid;
3219                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3220         }
3221
3222         /*
3223          * Fill in address info:
3224          * For UDP, use the cfil_info hash entry directly.
3225          * For TCP, compose an hash entry with the saved addresses.
3226          */
3227         if (cfil_info->cfi_hash_entry != NULL) {
3228                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3229         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3230             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3231                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3232                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3233                 hash_entry_ptr = &hash_entry;
3234         }
3235         if (hash_entry_ptr != NULL) {
3236                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3237                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
3238                     !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3239         }
3240         msg_attached.cfs_conn_dir = conn_dir;
3241
3242         if (msg_attached.cfs_e_pid != 0) {
3243                 p = proc_find(msg_attached.cfs_e_pid);
3244                 if (p != PROC_NULL) {
3245                         t = proc_task(p);
3246                         if (t != TASK_NULL) {
3247                                 audit_token_t audit_token;
3248                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3249                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3250                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3251                                 } else {
3252                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3253                                             entry->cfe_cfil_info->cfi_sock_id);
3254                                 }
3255                         }
3256                         proc_rele(p);
3257                 }
3258         }
3259
3260         if (cfil_info->cfi_debug) {
3261                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3262         }
3263
3264         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3265
3266 #if LIFECYCLE_DEBUG
3267         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3268             entry->cfe_cfil_info->cfi_sock_id);
3269 #endif
3270
3271         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3272             entry->cfe_filter->cf_kcunit,
3273             &msg_attached,
3274             sizeof(struct cfil_msg_sock_attached),
3275             CTL_DATA_EOR);
3276         if (error != 0) {
3277                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3278                 goto done;
3279         }
3280         microuptime(&entry->cfe_last_event);
3281         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3282         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3283
3284         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3285         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3286 done:
3287
3288         /* We can recover from flow control */
3289         if (error == ENOBUFS) {
3290                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3291                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3292
3293                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3294                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3295                 }
3296
3297                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3298
3299                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3300         } else {
3301                 if (error != 0) {
3302                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3303                 }
3304
3305                 cfil_rw_unlock_shared(&cfil_lck_rw);
3306         }
3307         return error;
3308 }
3309
3310 static int
3311 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3312 {
3313         errno_t error = 0;
3314         struct mbuf *msg = NULL;
3315         struct cfil_entry *entry;
3316         struct cfe_buf *entrybuf;
3317         struct cfil_msg_hdr msg_disconnected;
3318         struct content_filter *cfc;
3319
3320         socket_lock_assert_owned(so);
3321
3322         cfil_rw_lock_shared(&cfil_lck_rw);
3323
3324         entry = &cfil_info->cfi_entries[kcunit - 1];
3325         if (outgoing) {
3326                 entrybuf = &entry->cfe_snd;
3327         } else {
3328                 entrybuf = &entry->cfe_rcv;
3329         }
3330
3331         cfc = entry->cfe_filter;
3332         if (cfc == NULL) {
3333                 goto done;
3334         }
3335
3336         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3337             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3338
3339         /*
3340          * Send the disconnection event once
3341          */
3342         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3343             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3344                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3345                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3346                 goto done;
3347         }
3348
3349         /*
3350          * We're not disconnected as long as some data is waiting
3351          * to be delivered to the filter
3352          */
3353         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3354                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3355                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3356                 error = EBUSY;
3357                 goto done;
3358         }
3359         /* Would be wasteful to try when flow controlled */
3360         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3361                 error = ENOBUFS;
3362                 goto done;
3363         }
3364
3365         if (cfil_info->cfi_debug) {
3366                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3367         }
3368
3369 #if LIFECYCLE_DEBUG
3370         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3371             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3372             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3373 #endif
3374
3375         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3376         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3377         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3378         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3379         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3380             CFM_OP_DISCONNECT_IN;
3381         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3382         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3383             entry->cfe_filter->cf_kcunit,
3384             &msg_disconnected,
3385             sizeof(struct cfil_msg_hdr),
3386             CTL_DATA_EOR);
3387         if (error != 0) {
3388                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3389                 mbuf_freem(msg);
3390                 goto done;
3391         }
3392         microuptime(&entry->cfe_last_event);
3393         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3394
3395         /* Remember we have sent the disconnection message */
3396         if (outgoing) {
3397                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3398                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3399         } else {
3400                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3401                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3402         }
3403 done:
3404         if (error == ENOBUFS) {
3405                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3406                 OSIncrementAtomic(
3407                         &cfil_stats.cfs_disconnect_event_flow_control);
3408
3409                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3410                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3411                 }
3412
3413                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3414
3415                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3416         } else {
3417                 if (error != 0) {
3418                         OSIncrementAtomic(
3419                                 &cfil_stats.cfs_disconnect_event_fail);
3420                 }
3421
3422                 cfil_rw_unlock_shared(&cfil_lck_rw);
3423         }
3424         return error;
3425 }
3426
3427 int
3428 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3429 {
3430         struct cfil_entry *entry;
3431         struct cfil_msg_sock_closed msg_closed;
3432         errno_t error = 0;
3433         struct content_filter *cfc;
3434
3435         socket_lock_assert_owned(so);
3436
3437         cfil_rw_lock_shared(&cfil_lck_rw);
3438
3439         entry = &cfil_info->cfi_entries[kcunit - 1];
3440         cfc = entry->cfe_filter;
3441         if (cfc == NULL) {
3442                 goto done;
3443         }
3444
3445         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3446             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3447
3448         /* Would be wasteful to try when flow controlled */
3449         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3450                 error = ENOBUFS;
3451                 goto done;
3452         }
3453         /*
3454          * Send a single closed message per filter
3455          */
3456         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3457                 goto done;
3458         }
3459         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3460                 goto done;
3461         }
3462
3463         microuptime(&entry->cfe_last_event);
3464         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3465
3466         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3467         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3468         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3469         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3470         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3471         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3472         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3473         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3474         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3475         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3476         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3477         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3478         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3479
3480         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3481
3482         if (cfil_info->cfi_debug) {
3483                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3484         }
3485
3486 #if LIFECYCLE_DEBUG
3487         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3488 #endif
3489         /* for debugging
3490          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3491          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3492          *  }
3493          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3494          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3495          *  }
3496          */
3497
3498         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3499             entry->cfe_filter->cf_kcunit,
3500             &msg_closed,
3501             sizeof(struct cfil_msg_sock_closed),
3502             CTL_DATA_EOR);
3503         if (error != 0) {
3504                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3505                     error);
3506                 goto done;
3507         }
3508
3509         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3510         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3511 done:
3512         /* We can recover from flow control */
3513         if (error == ENOBUFS) {
3514                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3515                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3516
3517                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3518                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3519                 }
3520
3521                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3522
3523                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3524         } else {
3525                 if (error != 0) {
3526                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3527                 }
3528
3529                 cfil_rw_unlock_shared(&cfil_lck_rw);
3530         }
3531
3532         return error;
3533 }
3534
3535 static void
3536 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3537     struct in6_addr *ip6, u_int16_t port)
3538 {
3539         if (sin46 == NULL) {
3540                 return;
3541         }
3542
3543         struct sockaddr_in6 *sin6 = &sin46->sin6;
3544
3545         sin6->sin6_family = AF_INET6;
3546         sin6->sin6_len = sizeof(*sin6);
3547         sin6->sin6_port = port;
3548         sin6->sin6_addr = *ip6;
3549         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3550                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3551                 sin6->sin6_addr.s6_addr16[1] = 0;
3552         }
3553 }
3554
3555 static void
3556 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3557     struct in_addr ip, u_int16_t port)
3558 {
3559         if (sin46 == NULL) {
3560                 return;
3561         }
3562
3563         struct sockaddr_in *sin = &sin46->sin;
3564
3565         sin->sin_family = AF_INET;
3566         sin->sin_len = sizeof(*sin);
3567         sin->sin_port = port;
3568         sin->sin_addr.s_addr = ip.s_addr;
3569 }
3570
3571 static void
3572 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3573     struct in6_addr **laddr, struct in6_addr **faddr,
3574     u_int16_t *lport, u_int16_t *fport)
3575 {
3576         if (entry != NULL) {
3577                 *laddr = &entry->cfentry_laddr.addr6;
3578                 *faddr = &entry->cfentry_faddr.addr6;
3579                 *lport = entry->cfentry_lport;
3580                 *fport = entry->cfentry_fport;
3581         } else {
3582                 *laddr = &inp->in6p_laddr;
3583                 *faddr = &inp->in6p_faddr;
3584                 *lport = inp->inp_lport;
3585                 *fport = inp->inp_fport;
3586         }
3587 }
3588
3589 static void
3590 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3591     struct in_addr *laddr, struct in_addr *faddr,
3592     u_int16_t *lport, u_int16_t *fport)
3593 {
3594         if (entry != NULL) {
3595                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3596                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3597                 *lport = entry->cfentry_lport;
3598                 *fport = entry->cfentry_fport;
3599         } else {
3600                 *laddr = inp->inp_laddr;
3601                 *faddr = inp->inp_faddr;
3602                 *lport = inp->inp_lport;
3603                 *fport = inp->inp_fport;
3604         }
3605 }
3606
3607 static int
3608 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3609     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3610 {
3611         errno_t error = 0;
3612         struct mbuf *copy = NULL;
3613         struct mbuf *msg = NULL;
3614         unsigned int one = 1;
3615         struct cfil_msg_data_event *data_req;
3616         size_t hdrsize;
3617         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3618         struct cfil_entry *entry;
3619         struct cfe_buf *entrybuf;
3620         struct content_filter *cfc;
3621         struct timeval tv;
3622         int inp_flags = 0;
3623
3624         cfil_rw_lock_shared(&cfil_lck_rw);
3625
3626         entry = &cfil_info->cfi_entries[kcunit - 1];
3627         if (outgoing) {
3628                 entrybuf = &entry->cfe_snd;
3629         } else {
3630                 entrybuf = &entry->cfe_rcv;
3631         }
3632
3633         cfc = entry->cfe_filter;
3634         if (cfc == NULL) {
3635                 goto done;
3636         }
3637
3638         data = cfil_data_start(data);
3639         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3640                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3641                 goto done;
3642         }
3643
3644         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3645             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3646
3647         socket_lock_assert_owned(so);
3648
3649         /* Would be wasteful to try */
3650         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3651                 error = ENOBUFS;
3652                 goto done;
3653         }
3654
3655         /* Make a copy of the data to pass to kernel control socket */
3656         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3657             M_COPYM_NOOP_HDR);
3658         if (copy == NULL) {
3659                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3660                 error = ENOMEM;
3661                 goto done;
3662         }
3663
3664         /* We need an mbuf packet for the message header */
3665         hdrsize = sizeof(struct cfil_msg_data_event);
3666         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3667         if (error != 0) {
3668                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3669                 m_freem(copy);
3670                 /*
3671                  * ENOBUFS is to indicate flow control
3672                  */
3673                 error = ENOMEM;
3674                 goto done;
3675         }
3676         mbuf_setlen(msg, hdrsize);
3677         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3678         msg->m_next = copy;
3679         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3680         bzero(data_req, hdrsize);
3681         data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3682         data_req->cfd_msghdr.cfm_version = 1;
3683         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3684         data_req->cfd_msghdr.cfm_op =
3685             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3686         data_req->cfd_msghdr.cfm_sock_id =
3687             entry->cfe_cfil_info->cfi_sock_id;
3688         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3689         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3690
3691         data_req->cfd_flags = 0;
3692         if (OPTIONAL_IP_HEADER(so)) {
3693                 /*
3694                  * For non-UDP/TCP traffic, indicate to filters if optional
3695                  * IP header is present:
3696                  *      outgoing - indicate according to INP_HDRINCL flag
3697                  *      incoming - For IPv4 only, stripping of IP header is
3698                  *                 optional.  But for CFIL, we delay stripping
3699                  *                 at rip_input.  So CFIL always expects IP
3700                  *                 frames. IP header will be stripped according
3701                  *                 to INP_STRIPHDR flag later at reinjection.
3702                  */
3703                 if ((!outgoing && !IS_INP_V6(inp)) ||
3704                     (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3705                         data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3706                 }
3707         }
3708
3709         /*
3710          * Copy address/port into event msg.
3711          * For non connected sockets need to copy addresses from passed
3712          * parameters
3713          */
3714         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3715             &data_req->cfc_src, &data_req->cfc_dst,
3716             !IS_INP_V6(inp), outgoing);
3717
3718         if (cfil_info->cfi_debug) {
3719                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3720         }
3721
3722         if (cfil_info->cfi_isSignatureLatest == false) {
3723                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3724         }
3725
3726         microuptime(&tv);
3727         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3728
3729         /* Pass the message to the content filter */
3730         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3731             entry->cfe_filter->cf_kcunit,
3732             msg, CTL_DATA_EOR);
3733         if (error != 0) {
3734                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3735                 mbuf_freem(msg);
3736                 goto done;
3737         }
3738         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3739         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3740
3741 #if VERDICT_DEBUG
3742         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3743             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3744 #endif
3745
3746         if (cfil_info->cfi_debug) {
3747                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3748                     (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3749                     data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3750         }
3751
3752 done:
3753         if (error == ENOBUFS) {
3754                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3755                 OSIncrementAtomic(
3756                         &cfil_stats.cfs_data_event_flow_control);
3757
3758                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3759                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3760                 }
3761
3762                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3763
3764                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3765         } else {
3766                 if (error != 0) {
3767                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3768                 }
3769
3770                 cfil_rw_unlock_shared(&cfil_lck_rw);
3771         }
3772         return error;
3773 }
3774
3775 /*
3776  * Process the queue of data waiting to be delivered to content filter
3777  */
3778 static int
3779 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3780 {
3781         errno_t error = 0;
3782         struct mbuf *data, *tmp = NULL;
3783         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3784         struct cfil_entry *entry;
3785         struct cfe_buf *entrybuf;
3786         uint64_t currentoffset = 0;
3787
3788         if (cfil_info == NULL) {
3789                 return 0;
3790         }
3791
3792         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3793             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3794
3795         socket_lock_assert_owned(so);
3796
3797         entry = &cfil_info->cfi_entries[kcunit - 1];
3798         if (outgoing) {
3799                 entrybuf = &entry->cfe_snd;
3800         } else {
3801                 entrybuf = &entry->cfe_rcv;
3802         }
3803
3804         /* Send attached message if not yet done */
3805         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3806                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3807                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3808                 if (error != 0) {
3809                         /* We can recover from flow control */
3810                         if (error == ENOBUFS || error == ENOMEM) {
3811                                 error = 0;
3812                         }
3813                         goto done;
3814                 }
3815         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3816                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3817                 goto done;
3818         }
3819
3820 #if DATA_DEBUG
3821         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3822             entrybuf->cfe_pass_offset,
3823             entrybuf->cfe_peeked,
3824             entrybuf->cfe_peek_offset);
3825 #endif
3826
3827         /* Move all data that can pass */
3828         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3829             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3830                 datalen = cfil_data_length(data, NULL, NULL);
3831                 tmp = data;
3832
3833                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3834                     entrybuf->cfe_pass_offset) {
3835                         /*
3836                          * The first mbuf can fully pass
3837                          */
3838                         copylen = datalen;
3839                 } else {
3840                         /*
3841                          * The first mbuf can partially pass
3842                          */
3843                         copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
3844                 }
3845                 VERIFY(copylen <= datalen);
3846
3847 #if DATA_DEBUG
3848                 CFIL_LOG(LOG_DEBUG,
3849                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3850                     "datalen %u copylen %u",
3851                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3852                     entrybuf->cfe_ctl_q.q_start,
3853                     entrybuf->cfe_peeked,
3854                     entrybuf->cfe_pass_offset,
3855                     entrybuf->cfe_peek_offset,
3856                     datalen, copylen);
3857 #endif
3858
3859                 /*
3860                  * Data that passes has been peeked at explicitly or
3861                  * implicitly
3862                  */
3863                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3864                     entrybuf->cfe_peeked) {
3865                         entrybuf->cfe_peeked =
3866                             entrybuf->cfe_ctl_q.q_start + copylen;
3867                 }
3868                 /*
3869                  * Stop on partial pass
3870                  */
3871                 if (copylen < datalen) {
3872                         break;
3873                 }
3874
3875                 /* All good, move full data from ctl queue to pending queue */
3876                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3877
3878                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3879                 if (outgoing) {
3880                         OSAddAtomic64(datalen,
3881                             &cfil_stats.cfs_pending_q_out_enqueued);
3882                 } else {
3883                         OSAddAtomic64(datalen,
3884                             &cfil_stats.cfs_pending_q_in_enqueued);
3885                 }
3886         }
3887         CFIL_INFO_VERIFY(cfil_info);
3888         if (tmp != NULL) {
3889                 CFIL_LOG(LOG_DEBUG,
3890                     "%llx first %llu peeked %llu pass %llu peek %llu"
3891                     "datalen %u copylen %u",
3892                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3893                     entrybuf->cfe_ctl_q.q_start,
3894                     entrybuf->cfe_peeked,
3895                     entrybuf->cfe_pass_offset,
3896                     entrybuf->cfe_peek_offset,
3897                     datalen, copylen);
3898         }
3899         tmp = NULL;
3900
3901         /* Now deal with remaining data the filter wants to peek at */
3902         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3903             currentoffset = entrybuf->cfe_ctl_q.q_start;
3904             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3905             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3906             currentoffset += datalen) {
3907                 datalen = cfil_data_length(data, NULL, NULL);
3908                 tmp = data;
3909
3910                 /* We've already peeked at this mbuf */
3911                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3912                         continue;
3913                 }
3914                 /*
3915                  * The data in the first mbuf may have been
3916                  * partially peeked at
3917                  */
3918                 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
3919                 VERIFY(copyoffset < datalen);
3920                 copylen = datalen - copyoffset;
3921                 VERIFY(copylen <= datalen);
3922                 /*
3923                  * Do not copy more than needed
3924                  */
3925                 if (currentoffset + copyoffset + copylen >
3926                     entrybuf->cfe_peek_offset) {
3927                         copylen = (unsigned int)(entrybuf->cfe_peek_offset -
3928                             (currentoffset + copyoffset));
3929                 }
3930
3931 #if DATA_DEBUG
3932                 CFIL_LOG(LOG_DEBUG,
3933                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3934                     "datalen %u copylen %u copyoffset %u",
3935                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3936                     currentoffset,
3937                     entrybuf->cfe_peeked,
3938                     entrybuf->cfe_pass_offset,
3939                     entrybuf->cfe_peek_offset,
3940                     datalen, copylen, copyoffset);
3941 #endif
3942
3943                 /*
3944                  * Stop if there is nothing more to peek at
3945                  */
3946                 if (copylen == 0) {
3947                         break;
3948                 }
3949                 /*
3950                  * Let the filter get a peek at this span of data
3951                  */
3952                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3953                     outgoing, data, copyoffset, copylen);
3954                 if (error != 0) {
3955                         /* On error, leave data in ctl_q */
3956                         break;
3957                 }
3958                 entrybuf->cfe_peeked += copylen;
3959                 if (outgoing) {
3960                         OSAddAtomic64(copylen,
3961                             &cfil_stats.cfs_ctl_q_out_peeked);
3962                 } else {
3963                         OSAddAtomic64(copylen,
3964                             &cfil_stats.cfs_ctl_q_in_peeked);
3965                 }
3966
3967                 /* Stop when data could not be fully peeked at */
3968                 if (copylen + copyoffset < datalen) {
3969                         break;
3970                 }
3971         }
3972         CFIL_INFO_VERIFY(cfil_info);
3973         if (tmp != NULL) {
3974                 CFIL_LOG(LOG_DEBUG,
3975                     "%llx first %llu peeked %llu pass %llu peek %llu"
3976                     "datalen %u copylen %u copyoffset %u",
3977                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3978                     currentoffset,
3979                     entrybuf->cfe_peeked,
3980                     entrybuf->cfe_pass_offset,
3981                     entrybuf->cfe_peek_offset,
3982                     datalen, copylen, copyoffset);
3983         }
3984
3985         /*
3986          * Process data that has passed the filter
3987          */
3988         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3989         if (error != 0) {
3990                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3991                     error);
3992                 goto done;
3993         }
3994
3995         /*
3996          * Dispatch disconnect events that could not be sent
3997          */
3998         if (cfil_info == NULL) {
3999                 goto done;
4000         } else if (outgoing) {
4001                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
4002                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
4003                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
4004                 }
4005         } else {
4006                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
4007                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
4008                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
4009                 }
4010         }
4011
4012 done:
4013         CFIL_LOG(LOG_DEBUG,
4014             "first %llu peeked %llu pass %llu peek %llu",
4015             entrybuf->cfe_ctl_q.q_start,
4016             entrybuf->cfe_peeked,
4017             entrybuf->cfe_pass_offset,
4018             entrybuf->cfe_peek_offset);
4019
4020         CFIL_INFO_VERIFY(cfil_info);
4021         return error;
4022 }
4023
4024 /*
4025  * cfil_data_filter()
4026  *
4027  * Process data for a content filter installed on a socket
4028  */
4029 int
4030 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4031     struct mbuf *data, uint32_t datalen)
4032 {
4033         errno_t error = 0;
4034         struct cfil_entry *entry;
4035         struct cfe_buf *entrybuf;
4036
4037         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4038             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4039
4040         socket_lock_assert_owned(so);
4041
4042         entry = &cfil_info->cfi_entries[kcunit - 1];
4043         if (outgoing) {
4044                 entrybuf = &entry->cfe_snd;
4045         } else {
4046                 entrybuf = &entry->cfe_rcv;
4047         }
4048
4049         /* Are we attached to the filter? */
4050         if (entry->cfe_filter == NULL) {
4051                 error = 0;
4052                 goto done;
4053         }
4054
4055         /* Dispatch to filters */
4056         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4057         if (outgoing) {
4058                 OSAddAtomic64(datalen,
4059                     &cfil_stats.cfs_ctl_q_out_enqueued);
4060         } else {
4061                 OSAddAtomic64(datalen,
4062                     &cfil_stats.cfs_ctl_q_in_enqueued);
4063         }
4064
4065         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4066         if (error != 0) {
4067                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4068                     error);
4069         }
4070         /*
4071          * We have to return EJUSTRETURN in all cases to avoid double free
4072          * by socket layer
4073          */
4074         error = EJUSTRETURN;
4075 done:
4076         CFIL_INFO_VERIFY(cfil_info);
4077
4078         CFIL_LOG(LOG_INFO, "return %d", error);
4079         return error;
4080 }
4081
4082 /*
4083  * cfil_service_inject_queue() re-inject data that passed the
4084  * content filters
4085  */
4086 static int
4087 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4088 {
4089         mbuf_t data;
4090         unsigned int datalen;
4091         int mbcnt = 0;
4092         int mbnum = 0;
4093         errno_t error = 0;
4094         struct cfi_buf *cfi_buf;
4095         struct cfil_queue *inject_q;
4096         int need_rwakeup = 0;
4097         int count = 0;
4098         struct inpcb *inp = NULL;
4099         struct ip *ip = NULL;
4100         unsigned int hlen;
4101
4102         if (cfil_info == NULL) {
4103                 return 0;
4104         }
4105
4106         socket_lock_assert_owned(so);
4107
4108         if (outgoing) {
4109                 cfi_buf = &cfil_info->cfi_snd;
4110                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4111         } else {
4112                 cfi_buf = &cfil_info->cfi_rcv;
4113                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4114         }
4115         inject_q = &cfi_buf->cfi_inject_q;
4116
4117         if (cfil_queue_empty(inject_q)) {
4118                 return 0;
4119         }
4120
4121 #if DATA_DEBUG | VERDICT_DEBUG
4122         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4123             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4124 #endif
4125
4126         while ((data = cfil_queue_first(inject_q)) != NULL) {
4127                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4128
4129 #if DATA_DEBUG
4130                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4131                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4132 #endif
4133                 if (cfil_info->cfi_debug) {
4134                         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4135                             (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4136                 }
4137
4138                 /* Remove data from queue and adjust stats */
4139                 cfil_queue_remove(inject_q, data, datalen);
4140                 cfi_buf->cfi_pending_first += datalen;
4141                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4142                 cfi_buf->cfi_pending_mbnum -= mbnum;
4143                 cfil_info_buf_verify(cfi_buf);
4144
4145                 if (outgoing) {
4146                         error = sosend_reinject(so, NULL, data, NULL, 0);
4147                         if (error != 0) {
4148 #if DATA_DEBUG
4149                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4150                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4151 #endif
4152                                 break;
4153                         }
4154                         // At least one injection succeeded, need to wake up pending threads.
4155                         need_rwakeup = 1;
4156                 } else {
4157                         data->m_flags |= M_SKIPCFIL;
4158
4159                         /*
4160                          * NOTE: We currently only support TCP, UDP, ICMP,
4161                          * ICMPv6 and RAWIP.  For MPTCP and message TCP we'll
4162                          * need to call the appropriate sbappendxxx()
4163                          * of fix sock_inject_data_in()
4164                          */
4165                         if (IS_IP_DGRAM(so)) {
4166                                 if (OPTIONAL_IP_HEADER(so)) {
4167                                         inp = sotoinpcb(so);
4168                                         if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4169                                                 mbuf_t data_start = cfil_data_start(data);
4170                                                 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4171                                                         ip = mtod(data_start, struct ip *);
4172                                                         hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4173                                                         data_start->m_len -= hlen;
4174                                                         data_start->m_pkthdr.len -= hlen;
4175                                                         data_start->m_data += hlen;
4176                                                 }
4177                                         }
4178                                 }
4179
4180                                 if (sbappendchain(&so->so_rcv, data, 0)) {
4181                                         need_rwakeup = 1;
4182                                 }
4183                         } else {
4184                                 if (sbappendstream(&so->so_rcv, data)) {
4185                                         need_rwakeup = 1;
4186                                 }
4187                         }
4188                 }
4189
4190                 if (outgoing) {
4191                         OSAddAtomic64(datalen,
4192                             &cfil_stats.cfs_inject_q_out_passed);
4193                 } else {
4194                         OSAddAtomic64(datalen,
4195                             &cfil_stats.cfs_inject_q_in_passed);
4196                 }
4197
4198                 count++;
4199         }
4200
4201 #if DATA_DEBUG | VERDICT_DEBUG
4202         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4203             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4204 #endif
4205         if (cfil_info->cfi_debug) {
4206                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4207                     (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4208         }
4209
4210         /* A single wakeup is for several packets is more efficient */
4211         if (need_rwakeup) {
4212                 if (outgoing == TRUE) {
4213                         sowwakeup(so);
4214                 } else {
4215                         sorwakeup(so);
4216                 }
4217         }
4218
4219         if (error != 0 && cfil_info) {
4220                 if (error == ENOBUFS) {
4221                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4222                 }
4223                 if (error == ENOMEM) {
4224                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4225                 }
4226
4227                 if (outgoing) {
4228                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4229                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4230                 } else {
4231                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4232                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4233                 }
4234         }
4235
4236         /*
4237          * Notify
4238          */
4239         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4240                 cfil_sock_notify_shutdown(so, SHUT_WR);
4241                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4242                         soshutdownlock_final(so, SHUT_WR);
4243                 }
4244         }
4245         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4246                 if (cfil_filters_attached(so) == 0) {
4247                         CFIL_LOG(LOG_INFO, "so %llx waking",
4248                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4249                         wakeup((caddr_t)cfil_info);
4250                 }
4251         }
4252
4253         CFIL_INFO_VERIFY(cfil_info);
4254
4255         return error;
4256 }
4257
4258 static int
4259 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4260 {
4261         uint64_t passlen, curlen;
4262         mbuf_t data;
4263         unsigned int datalen;
4264         errno_t error = 0;
4265         struct cfil_entry *entry;
4266         struct cfe_buf *entrybuf;
4267         struct cfil_queue *pending_q;
4268
4269         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4270             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4271
4272         socket_lock_assert_owned(so);
4273
4274         entry = &cfil_info->cfi_entries[kcunit - 1];
4275         if (outgoing) {
4276                 entrybuf = &entry->cfe_snd;
4277         } else {
4278                 entrybuf = &entry->cfe_rcv;
4279         }
4280
4281         pending_q = &entrybuf->cfe_pending_q;
4282
4283         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4284
4285         /*
4286          * Locate the chunks of data that we can pass to the next filter
4287          * A data chunk must be on mbuf boundaries
4288          */
4289         curlen = 0;
4290         while ((data = cfil_queue_first(pending_q)) != NULL) {
4291                 struct cfil_entry *iter_entry;
4292                 datalen = cfil_data_length(data, NULL, NULL);
4293
4294 #if DATA_DEBUG
4295                 CFIL_LOG(LOG_DEBUG,
4296                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4297                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4298                     passlen, curlen);
4299 #endif
4300
4301                 if (curlen + datalen > passlen) {
4302                         break;
4303                 }
4304
4305                 cfil_queue_remove(pending_q, data, datalen);
4306
4307                 curlen += datalen;
4308
4309                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4310                     iter_entry != NULL;
4311                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4312                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4313                             data, datalen);
4314                         /* 0 means passed so we can continue */
4315                         if (error != 0) {
4316                                 break;
4317                         }
4318                 }
4319                 /* When data has passed all filters, re-inject */
4320                 if (error == 0) {
4321                         if (outgoing) {
4322                                 cfil_queue_enqueue(
4323                                         &cfil_info->cfi_snd.cfi_inject_q,
4324                                         data, datalen);
4325                                 OSAddAtomic64(datalen,
4326                                     &cfil_stats.cfs_inject_q_out_enqueued);
4327                         } else {
4328                                 cfil_queue_enqueue(
4329                                         &cfil_info->cfi_rcv.cfi_inject_q,
4330                                         data, datalen);
4331                                 OSAddAtomic64(datalen,
4332                                     &cfil_stats.cfs_inject_q_in_enqueued);
4333                         }
4334                 }
4335         }
4336
4337         CFIL_INFO_VERIFY(cfil_info);
4338
4339         return error;
4340 }
4341
4342 int
4343 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4344     uint64_t pass_offset, uint64_t peek_offset)
4345 {
4346         errno_t error = 0;
4347         struct cfil_entry *entry = NULL;
4348         struct cfe_buf *entrybuf;
4349         int updated = 0;
4350
4351         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4352
4353         socket_lock_assert_owned(so);
4354
4355         if (cfil_info == NULL) {
4356                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4357                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4358                 error = 0;
4359                 goto done;
4360         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4361                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4362                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4363                 error = EPIPE;
4364                 goto done;
4365         }
4366
4367         entry = &cfil_info->cfi_entries[kcunit - 1];
4368         if (outgoing) {
4369                 entrybuf = &entry->cfe_snd;
4370         } else {
4371                 entrybuf = &entry->cfe_rcv;
4372         }
4373
4374         /* Record updated offsets for this content filter */
4375         if (pass_offset > entrybuf->cfe_pass_offset) {
4376                 entrybuf->cfe_pass_offset = pass_offset;
4377
4378                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4379                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4380                 }
4381                 updated = 1;
4382         } else {
4383                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4384                     pass_offset, entrybuf->cfe_pass_offset);
4385         }
4386         /* Filter does not want or need to see data that's allowed to pass */
4387         if (peek_offset > entrybuf->cfe_pass_offset &&
4388             peek_offset > entrybuf->cfe_peek_offset) {
4389                 entrybuf->cfe_peek_offset = peek_offset;
4390                 updated = 1;
4391         }
4392         /* Nothing to do */
4393         if (updated == 0) {
4394                 goto done;
4395         }
4396
4397         /* Move data held in control queue to pending queue if needed */
4398         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4399         if (error != 0) {
4400                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4401                     error);
4402                 goto done;
4403         }
4404         error = EJUSTRETURN;
4405
4406 done:
4407         /*
4408          * The filter is effectively detached when pass all from both sides
4409          * or when the socket is closed and no more data is waiting
4410          * to be delivered to the filter
4411          */
4412         if (entry != NULL &&
4413             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4414             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4415             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4416             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4417             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4418                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4419 #if LIFECYCLE_DEBUG
4420                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4421                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4422                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4423 #endif
4424                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4425                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4426                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4427                     cfil_filters_attached(so) == 0) {
4428 #if LIFECYCLE_DEBUG
4429                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4430 #endif
4431                         CFIL_LOG(LOG_INFO, "so %llx waking",
4432                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4433                         wakeup((caddr_t)cfil_info);
4434                 }
4435         }
4436         CFIL_INFO_VERIFY(cfil_info);
4437         CFIL_LOG(LOG_INFO, "return %d", error);
4438         return error;
4439 }
4440
4441 /*
4442  * Update pass offset for socket when no data is pending
4443  */
4444 static int
4445 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4446 {
4447         struct cfi_buf *cfi_buf;
4448         struct cfil_entry *entry;
4449         struct cfe_buf *entrybuf;
4450         uint32_t kcunit;
4451         uint64_t pass_offset = 0;
4452         boolean_t first = true;
4453
4454         if (cfil_info == NULL) {
4455                 return 0;
4456         }
4457
4458         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4459             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4460
4461         socket_lock_assert_owned(so);
4462
4463         if (outgoing) {
4464                 cfi_buf = &cfil_info->cfi_snd;
4465         } else {
4466                 cfi_buf = &cfil_info->cfi_rcv;
4467         }
4468
4469         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4470             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4471             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4472
4473         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4474                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4475                         entry = &cfil_info->cfi_entries[kcunit - 1];
4476
4477                         /* Are we attached to a filter? */
4478                         if (entry->cfe_filter == NULL) {
4479                                 continue;
4480                         }
4481
4482                         if (outgoing) {
4483                                 entrybuf = &entry->cfe_snd;
4484                         } else {
4485                                 entrybuf = &entry->cfe_rcv;
4486                         }
4487
4488                         // Keep track of the smallest pass_offset among filters.
4489                         if (first == true ||
4490                             entrybuf->cfe_pass_offset < pass_offset) {
4491                                 pass_offset = entrybuf->cfe_pass_offset;
4492                                 first = false;
4493                         }
4494                 }
4495                 cfi_buf->cfi_pass_offset = pass_offset;
4496         }
4497
4498         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4499             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4500
4501         return 0;
4502 }
4503
4504 int
4505 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4506     uint64_t pass_offset, uint64_t peek_offset)
4507 {
4508         errno_t error = 0;
4509
4510         CFIL_LOG(LOG_INFO, "");
4511
4512         socket_lock_assert_owned(so);
4513
4514         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4515         if (error != 0) {
4516                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4517                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4518                     outgoing ? "out" : "in");
4519                 goto release;
4520         }
4521
4522         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4523             pass_offset, peek_offset);
4524
4525         cfil_service_inject_queue(so, cfil_info, outgoing);
4526
4527         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4528 release:
4529         CFIL_INFO_VERIFY(cfil_info);
4530         cfil_release_sockbuf(so, outgoing);
4531
4532         return error;
4533 }
4534
4535
4536 static void
4537 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4538 {
4539         struct cfil_entry *entry;
4540         int kcunit;
4541         uint64_t drained;
4542
4543         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4544                 goto done;
4545         }
4546
4547         socket_lock_assert_owned(so);
4548
4549         /*
4550          * Flush the output queues and ignore errors as long as
4551          * we are attached
4552          */
4553         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4554         if (cfil_info != NULL) {
4555                 drained = 0;
4556                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4557                         entry = &cfil_info->cfi_entries[kcunit - 1];
4558
4559                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4560                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4561                 }
4562                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4563
4564                 if (drained) {
4565                         if (cfil_info->cfi_flags & CFIF_DROP) {
4566                                 OSIncrementAtomic(
4567                                         &cfil_stats.cfs_flush_out_drop);
4568                         } else {
4569                                 OSIncrementAtomic(
4570                                         &cfil_stats.cfs_flush_out_close);
4571                         }
4572                 }
4573         }
4574         cfil_release_sockbuf(so, 1);
4575
4576         /*
4577          * Flush the input queues
4578          */
4579         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4580         if (cfil_info != NULL) {
4581                 drained = 0;
4582                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4583                         entry = &cfil_info->cfi_entries[kcunit - 1];
4584
4585                         drained += cfil_queue_drain(
4586                                 &entry->cfe_rcv.cfe_ctl_q);
4587                         drained += cfil_queue_drain(
4588                                 &entry->cfe_rcv.cfe_pending_q);
4589                 }
4590                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4591
4592                 if (drained) {
4593                         if (cfil_info->cfi_flags & CFIF_DROP) {
4594                                 OSIncrementAtomic(
4595                                         &cfil_stats.cfs_flush_in_drop);
4596                         } else {
4597                                 OSIncrementAtomic(
4598                                         &cfil_stats.cfs_flush_in_close);
4599                         }
4600                 }
4601         }
4602         cfil_release_sockbuf(so, 0);
4603 done:
4604         CFIL_INFO_VERIFY(cfil_info);
4605 }
4606
4607 int
4608 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4609 {
4610         errno_t error = 0;
4611         struct cfil_entry *entry;
4612         struct proc *p;
4613
4614         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4615                 goto done;
4616         }
4617
4618         socket_lock_assert_owned(so);
4619
4620         entry = &cfil_info->cfi_entries[kcunit - 1];
4621
4622         /* Are we attached to the filter? */
4623         if (entry->cfe_filter == NULL) {
4624                 goto done;
4625         }
4626
4627         cfil_info->cfi_flags |= CFIF_DROP;
4628
4629         p = current_proc();
4630
4631         /*
4632          * Force the socket to be marked defunct
4633          * (forcing fixed along with rdar://19391339)
4634          */
4635         if (so->so_cfil_db == NULL) {
4636                 error = sosetdefunct(p, so,
4637                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4638                     FALSE);
4639
4640                 /* Flush the socket buffer and disconnect */
4641                 if (error == 0) {
4642                         error = sodefunct(p, so,
4643                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4644                 }
4645         }
4646
4647         /* The filter is done, mark as detached */
4648         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4649 #if LIFECYCLE_DEBUG
4650         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4651 #endif
4652         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4653             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4654
4655         /* Pending data needs to go */
4656         cfil_flush_queues(so, cfil_info);
4657
4658         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4659                 if (cfil_filters_attached(so) == 0) {
4660                         CFIL_LOG(LOG_INFO, "so %llx waking",
4661                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4662                         wakeup((caddr_t)cfil_info);
4663                 }
4664         }
4665 done:
4666         return error;
4667 }
4668
4669 int
4670 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4671 {
4672         errno_t error = 0;
4673         struct cfil_info *cfil_info = NULL;
4674
4675         bool cfil_attached = false;
4676         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4677
4678         // Search and lock socket
4679         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4680         if (so == NULL) {
4681                 error = ENOENT;
4682         } else {
4683                 // The client gets a pass automatically
4684                 cfil_info = (so->so_cfil_db != NULL) ?
4685                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4686
4687                 if (cfil_attached) {
4688 #if VERDICT_DEBUG
4689                         if (cfil_info != NULL) {
4690                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4691                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4692                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4693                                     cfil_info->cfi_sock_id);
4694                         }
4695 #endif
4696                         cfil_sock_received_verdict(so);
4697                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4698                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4699                 } else {
4700                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4701                 }
4702                 socket_unlock(so, 1);
4703         }
4704
4705         return error;
4706 }
4707
4708 int
4709 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4710 {
4711         struct content_filter *cfc = NULL;
4712         cfil_crypto_state_t crypto_state = NULL;
4713         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4714
4715         CFIL_LOG(LOG_NOTICE, "");
4716
4717         if (content_filters == NULL) {
4718                 CFIL_LOG(LOG_ERR, "no content filter");
4719                 return EINVAL;
4720         }
4721         if (kcunit > MAX_CONTENT_FILTER) {
4722                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4723                     kcunit, MAX_CONTENT_FILTER);
4724                 return EINVAL;
4725         }
4726         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4727         if (crypto_state == NULL) {
4728                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4729                     kcunit);
4730                 return EINVAL;
4731         }
4732
4733         cfil_rw_lock_exclusive(&cfil_lck_rw);
4734
4735         cfc = content_filters[kcunit - 1];
4736         if (cfc->cf_kcunit != kcunit) {
4737                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4738                     kcunit);
4739                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4740                 cfil_crypto_cleanup_state(crypto_state);
4741                 return EINVAL;
4742         }
4743         if (cfc->cf_crypto_state != NULL) {
4744                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4745                 cfc->cf_crypto_state = NULL;
4746         }
4747         cfc->cf_crypto_state = crypto_state;
4748
4749         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4750         return 0;
4751 }
4752
4753 static int
4754 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4755 {
4756         struct cfil_entry *entry;
4757         struct cfe_buf *entrybuf;
4758         uint32_t kcunit;
4759
4760         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4761             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4762
4763         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4764                 entry = &cfil_info->cfi_entries[kcunit - 1];
4765
4766                 /* Are we attached to the filter? */
4767                 if (entry->cfe_filter == NULL) {
4768                         continue;
4769                 }
4770
4771                 if (outgoing) {
4772                         entrybuf = &entry->cfe_snd;
4773                 } else {
4774                         entrybuf = &entry->cfe_rcv;
4775                 }
4776
4777                 entrybuf->cfe_ctl_q.q_start += datalen;
4778                 if (entrybuf->cfe_pass_offset < entrybuf->cfe_ctl_q.q_start) {
4779                         entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4780                 }
4781                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4782                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4783                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4784                 }
4785
4786                 entrybuf->cfe_ctl_q.q_end += datalen;
4787
4788                 entrybuf->cfe_pending_q.q_start += datalen;
4789                 entrybuf->cfe_pending_q.q_end += datalen;
4790         }
4791         CFIL_INFO_VERIFY(cfil_info);
4792         return 0;
4793 }
4794
4795 int
4796 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4797     struct mbuf *data, struct mbuf *control, uint32_t flags)
4798 {
4799 #pragma unused(to, control, flags)
4800         errno_t error = 0;
4801         unsigned int datalen;
4802         int mbcnt = 0;
4803         int mbnum = 0;
4804         int kcunit;
4805         struct cfi_buf *cfi_buf;
4806         struct mbuf *chain = NULL;
4807
4808         if (cfil_info == NULL) {
4809                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4810                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4811                 error = 0;
4812                 goto done;
4813         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4814                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4815                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4816                 error = EPIPE;
4817                 goto done;
4818         }
4819
4820         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4821
4822         if (datalen == 0) {
4823                 error = 0;
4824                 goto done;
4825         }
4826
4827         if (outgoing) {
4828                 cfi_buf = &cfil_info->cfi_snd;
4829                 cfil_info->cfi_byte_outbound_count += datalen;
4830         } else {
4831                 cfi_buf = &cfil_info->cfi_rcv;
4832                 cfil_info->cfi_byte_inbound_count += datalen;
4833         }
4834
4835         cfi_buf->cfi_pending_last += datalen;
4836         cfi_buf->cfi_pending_mbcnt += mbcnt;
4837         cfi_buf->cfi_pending_mbnum += mbnum;
4838
4839         if (IS_IP_DGRAM(so)) {
4840                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4841                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4842                         cfi_buf->cfi_tail_drop_cnt++;
4843                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4844                         cfi_buf->cfi_pending_mbnum -= mbnum;
4845                         return EPIPE;
4846                 }
4847         }
4848
4849         cfil_info_buf_verify(cfi_buf);
4850
4851 #if DATA_DEBUG
4852         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4853             (uint64_t)VM_KERNEL_ADDRPERM(so),
4854             outgoing ? "OUT" : "IN",
4855             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4856             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4857             cfi_buf->cfi_pending_last,
4858             cfi_buf->cfi_pending_mbcnt,
4859             cfi_buf->cfi_pass_offset);
4860 #endif
4861
4862         /* Fast path when below pass offset */
4863         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4864                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4865 #if DATA_DEBUG
4866                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4867 #endif
4868         } else {
4869                 struct cfil_entry *iter_entry;
4870                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4871                         // Is cfil attached to this filter?
4872                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4873                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4874                                 if (IS_IP_DGRAM(so) && chain == NULL) {
4875                                         /* Datagrams only:
4876                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4877                                          * This full chain will be reinjected into socket after recieving verdict.
4878                                          */
4879                                         (void) cfil_dgram_save_socket_state(cfil_info, data);
4880                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4881                                         if (chain == NULL) {
4882                                                 return ENOBUFS;
4883                                         }
4884                                         data = chain;
4885                                 }
4886                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4887                                     datalen);
4888                         }
4889                         /* 0 means passed so continue with next filter */
4890                         if (error != 0) {
4891                                 break;
4892                         }
4893                 }
4894         }
4895
4896         /* Move cursor if no filter claimed the data */
4897         if (error == 0) {
4898                 cfi_buf->cfi_pending_first += datalen;
4899                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4900                 cfi_buf->cfi_pending_mbnum -= mbnum;
4901                 cfil_info_buf_verify(cfi_buf);
4902         }
4903 done:
4904         CFIL_INFO_VERIFY(cfil_info);
4905
4906         return error;
4907 }
4908
4909 /*
4910  * Callback from socket layer sosendxxx()
4911  */
4912 int
4913 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4914     struct mbuf *data, struct mbuf *control, uint32_t flags)
4915 {
4916         int error = 0;
4917         int new_filter_control_unit = 0;
4918
4919         if (IS_IP_DGRAM(so)) {
4920                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4921         }
4922
4923         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4924                 /* Drop pre-existing TCP sockets if filter is enabled now */
4925                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4926                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4927                         if (new_filter_control_unit > 0) {
4928                                 return EPIPE;
4929                         }
4930                 }
4931                 return 0;
4932         }
4933
4934         /* Drop pre-existing TCP sockets when filter state changed */
4935         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4936         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4937                 return EPIPE;
4938         }
4939
4940         /*
4941          * Pass initial data for TFO.
4942          */
4943         if (IS_INITIAL_TFO_DATA(so)) {
4944                 return 0;
4945         }
4946
4947         socket_lock_assert_owned(so);
4948
4949         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4950                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4951                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4952                 return EPIPE;
4953         }
4954         if (control != NULL) {
4955                 CFIL_LOG(LOG_ERR, "so %llx control",
4956                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4957                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4958         }
4959         if ((flags & MSG_OOB)) {
4960                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4961                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4962                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4963         }
4964         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4965                 panic("so %p SB_LOCK not set", so);
4966         }
4967
4968         if (so->so_snd.sb_cfil_thread != NULL) {
4969                 panic("%s sb_cfil_thread %p not NULL", __func__,
4970                     so->so_snd.sb_cfil_thread);
4971         }
4972
4973         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4974
4975         return error;
4976 }
4977
4978 /*
4979  * Callback from socket layer sbappendxxx()
4980  */
4981 int
4982 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4983     struct mbuf *data, struct mbuf *control, uint32_t flags)
4984 {
4985         int error = 0;
4986         int new_filter_control_unit = 0;
4987
4988         if (IS_IP_DGRAM(so)) {
4989                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4990         }
4991
4992         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4993                 /* Drop pre-existing TCP sockets if filter is enabled now */
4994                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4995                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4996                         if (new_filter_control_unit > 0) {
4997                                 return EPIPE;
4998                         }
4999                 }
5000                 return 0;
5001         }
5002
5003         /* Drop pre-existing TCP sockets when filter state changed */
5004         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5005         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
5006                 return EPIPE;
5007         }
5008
5009         /*
5010          * Pass initial data for TFO.
5011          */
5012         if (IS_INITIAL_TFO_DATA(so)) {
5013                 return 0;
5014         }
5015
5016         socket_lock_assert_owned(so);
5017
5018         if (so->so_cfil->cfi_flags & CFIF_DROP) {
5019                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5020                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5021                 return EPIPE;
5022         }
5023         if (control != NULL) {
5024                 CFIL_LOG(LOG_ERR, "so %llx control",
5025                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5026                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5027         }
5028         if (data->m_type == MT_OOBDATA) {
5029                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5030                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5031                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5032         }
5033         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
5034
5035         return error;
5036 }
5037
5038 /*
5039  * Callback from socket layer soshutdownxxx()
5040  *
5041  * We may delay the shutdown write if there's outgoing data in process.
5042  *
5043  * There is no point in delaying the shutdown read because the process
5044  * indicated that it does not want to read anymore data.
5045  */
5046 int
5047 cfil_sock_shutdown(struct socket *so, int *how)
5048 {
5049         int error = 0;
5050
5051         if (IS_IP_DGRAM(so)) {
5052                 return cfil_sock_udp_shutdown(so, how);
5053         }
5054
5055         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5056                 goto done;
5057         }
5058
5059         socket_lock_assert_owned(so);
5060
5061         CFIL_LOG(LOG_INFO, "so %llx how %d",
5062             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5063
5064         /*
5065          * Check the state of the socket before the content filter
5066          */
5067         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5068                 /* read already shut down */
5069                 error = ENOTCONN;
5070                 goto done;
5071         }
5072         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5073                 /* write already shut down */
5074                 error = ENOTCONN;
5075                 goto done;
5076         }
5077
5078         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5079                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5080                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5081                 goto done;
5082         }
5083
5084         /*
5085          * shutdown read: SHUT_RD or SHUT_RDWR
5086          */
5087         if (*how != SHUT_WR) {
5088                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5089                         error = ENOTCONN;
5090                         goto done;
5091                 }
5092                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5093                 cfil_sock_notify_shutdown(so, SHUT_RD);
5094         }
5095         /*
5096          * shutdown write: SHUT_WR or SHUT_RDWR
5097          */
5098         if (*how != SHUT_RD) {
5099                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5100                         error = ENOTCONN;
5101                         goto done;
5102                 }
5103                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5104                 cfil_sock_notify_shutdown(so, SHUT_WR);
5105                 /*
5106                  * When outgoing data is pending, we delay the shutdown at the
5107                  * protocol level until the content filters give the final
5108                  * verdict on the pending data.
5109                  */
5110                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5111                         /*
5112                          * When shutting down the read and write sides at once
5113                          * we can proceed to the final shutdown of the read
5114                          * side. Otherwise, we just return.
5115                          */
5116                         if (*how == SHUT_WR) {
5117                                 error = EJUSTRETURN;
5118                         } else if (*how == SHUT_RDWR) {
5119                                 *how = SHUT_RD;
5120                         }
5121                 }
5122         }
5123 done:
5124         return error;
5125 }
5126
5127 /*
5128  * This is called when the socket is closed and there is no more
5129  * opportunity for filtering
5130  */
5131 void
5132 cfil_sock_is_closed(struct socket *so)
5133 {
5134         errno_t error = 0;
5135         int kcunit;
5136
5137         if (IS_IP_DGRAM(so)) {
5138                 cfil_sock_udp_is_closed(so);
5139                 return;
5140         }
5141
5142         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5143                 return;
5144         }
5145
5146         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5147
5148         socket_lock_assert_owned(so);
5149
5150         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5151                 /* Let the filters know of the closing */
5152                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5153         }
5154
5155         /* Last chance to push passed data out */
5156         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5157         if (error == 0) {
5158                 cfil_service_inject_queue(so, so->so_cfil, 1);
5159         }
5160         cfil_release_sockbuf(so, 1);
5161
5162         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5163
5164         /* Pending data needs to go */
5165         cfil_flush_queues(so, so->so_cfil);
5166
5167         CFIL_INFO_VERIFY(so->so_cfil);
5168 }
5169
5170 /*
5171  * This is called when the socket is disconnected so let the filters
5172  * know about the disconnection and that no more data will come
5173  *
5174  * The how parameter has the same values as soshutown()
5175  */
5176 void
5177 cfil_sock_notify_shutdown(struct socket *so, int how)
5178 {
5179         errno_t error = 0;
5180         int kcunit;
5181
5182         if (IS_IP_DGRAM(so)) {
5183                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5184                 return;
5185         }
5186
5187         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5188                 return;
5189         }
5190
5191         CFIL_LOG(LOG_INFO, "so %llx how %d",
5192             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5193
5194         socket_lock_assert_owned(so);
5195
5196         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5197                 /* Disconnect incoming side */
5198                 if (how != SHUT_WR) {
5199                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5200                 }
5201                 /* Disconnect outgoing side */
5202                 if (how != SHUT_RD) {
5203                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5204                 }
5205         }
5206 }
5207
5208 static int
5209 cfil_filters_attached(struct socket *so)
5210 {
5211         struct cfil_entry *entry;
5212         uint32_t kcunit;
5213         int attached = 0;
5214
5215         if (IS_IP_DGRAM(so)) {
5216                 return cfil_filters_udp_attached(so, FALSE);
5217         }
5218
5219         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5220                 return 0;
5221         }
5222
5223         socket_lock_assert_owned(so);
5224
5225         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5226                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5227
5228                 /* Are we attached to the filter? */
5229                 if (entry->cfe_filter == NULL) {
5230                         continue;
5231                 }
5232                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5233                         continue;
5234                 }
5235                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5236                         continue;
5237                 }
5238                 attached = 1;
5239                 break;
5240         }
5241
5242         return attached;
5243 }
5244
5245 /*
5246  * This is called when the socket is closed and we are waiting for
5247  * the filters to gives the final pass or drop
5248  */
5249 void
5250 cfil_sock_close_wait(struct socket *so)
5251 {
5252         lck_mtx_t *mutex_held;
5253         struct timespec ts;
5254         int error;
5255
5256         if (IS_IP_DGRAM(so)) {
5257                 cfil_sock_udp_close_wait(so);
5258                 return;
5259         }
5260
5261         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5262                 return;
5263         }
5264
5265         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5266
5267         if (so->so_proto->pr_getlock != NULL) {
5268                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5269         } else {
5270                 mutex_held = so->so_proto->pr_domain->dom_mtx;
5271         }
5272         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5273
5274         while (cfil_filters_attached(so)) {
5275                 /*
5276                  * Notify the filters we are going away so they can detach
5277                  */
5278                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5279
5280                 /*
5281                  * Make sure we need to wait after the filter are notified
5282                  * of the disconnection
5283                  */
5284                 if (cfil_filters_attached(so) == 0) {
5285                         break;
5286                 }
5287
5288                 CFIL_LOG(LOG_INFO, "so %llx waiting",
5289                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5290
5291                 ts.tv_sec = cfil_close_wait_timeout / 1000;
5292                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5293                     NSEC_PER_USEC * 1000;
5294
5295                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5296                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5297                 error = msleep((caddr_t)so->so_cfil, mutex_held,
5298                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5299                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5300
5301                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5302                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5303
5304                 /*
5305                  * Force close in case of timeout
5306                  */
5307                 if (error != 0) {
5308                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5309                         break;
5310                 }
5311         }
5312 }
5313
5314 /*
5315  * Returns the size of the data held by the content filter by using
5316  */
5317 int32_t
5318 cfil_sock_data_pending(struct sockbuf *sb)
5319 {
5320         struct socket *so = sb->sb_so;
5321         uint64_t pending = 0;
5322
5323         if (IS_IP_DGRAM(so)) {
5324                 return cfil_sock_udp_data_pending(sb, FALSE);
5325         }
5326
5327         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5328                 struct cfi_buf *cfi_buf;
5329
5330                 socket_lock_assert_owned(so);
5331
5332                 if ((sb->sb_flags & SB_RECV) == 0) {
5333                         cfi_buf = &so->so_cfil->cfi_snd;
5334                 } else {
5335                         cfi_buf = &so->so_cfil->cfi_rcv;
5336                 }
5337
5338                 pending = cfi_buf->cfi_pending_last -
5339                     cfi_buf->cfi_pending_first;
5340
5341                 /*
5342                  * If we are limited by the "chars of mbufs used" roughly
5343                  * adjust so we won't overcommit
5344                  */
5345                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5346                         pending = cfi_buf->cfi_pending_mbcnt;
5347                 }
5348         }
5349
5350         VERIFY(pending < INT32_MAX);
5351
5352         return (int32_t)(pending);
5353 }
5354
5355 /*
5356  * Return the socket buffer space used by data being held by content filters
5357  * so processes won't clog the socket buffer
5358  */
5359 int32_t
5360 cfil_sock_data_space(struct sockbuf *sb)
5361 {
5362         struct socket *so = sb->sb_so;
5363         uint64_t pending = 0;
5364
5365         if (IS_IP_DGRAM(so)) {
5366                 return cfil_sock_udp_data_pending(sb, TRUE);
5367         }
5368
5369         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5370             so->so_snd.sb_cfil_thread != current_thread()) {
5371                 struct cfi_buf *cfi_buf;
5372
5373                 socket_lock_assert_owned(so);
5374
5375                 if ((sb->sb_flags & SB_RECV) == 0) {
5376                         cfi_buf = &so->so_cfil->cfi_snd;
5377                 } else {
5378                         cfi_buf = &so->so_cfil->cfi_rcv;
5379                 }
5380
5381                 pending = cfi_buf->cfi_pending_last -
5382                     cfi_buf->cfi_pending_first;
5383
5384                 /*
5385                  * If we are limited by the "chars of mbufs used" roughly
5386                  * adjust so we won't overcommit
5387                  */
5388                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5389                         pending = cfi_buf->cfi_pending_mbcnt;
5390                 }
5391         }
5392
5393         VERIFY(pending < INT32_MAX);
5394
5395         return (int32_t)(pending);
5396 }
5397
5398 /*
5399  * A callback from the socket and protocol layer when data becomes
5400  * available in the socket buffer to give a chance for the content filter
5401  * to re-inject data that was held back
5402  */
5403 void
5404 cfil_sock_buf_update(struct sockbuf *sb)
5405 {
5406         int outgoing;
5407         int error;
5408         struct socket *so = sb->sb_so;
5409
5410         if (IS_IP_DGRAM(so)) {
5411                 cfil_sock_udp_buf_update(sb);
5412                 return;
5413         }
5414
5415         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5416                 return;
5417         }
5418
5419         if (!cfil_sbtrim) {
5420                 return;
5421         }
5422
5423         socket_lock_assert_owned(so);
5424
5425         if ((sb->sb_flags & SB_RECV) == 0) {
5426                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5427                         return;
5428                 }
5429                 outgoing = 1;
5430                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5431         } else {
5432                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5433                         return;
5434                 }
5435                 outgoing = 0;
5436                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5437         }
5438
5439         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5440             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5441
5442         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5443         if (error == 0) {
5444                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5445         }
5446         cfil_release_sockbuf(so, outgoing);
5447 }
5448
5449 int
5450 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5451     struct sysctl_req *req)
5452 {
5453 #pragma unused(oidp, arg1, arg2)
5454         int error = 0;
5455         size_t len = 0;
5456         u_int32_t i;
5457
5458         /* Read only  */
5459         if (req->newptr != USER_ADDR_NULL) {
5460                 return EPERM;
5461         }
5462
5463         cfil_rw_lock_shared(&cfil_lck_rw);
5464
5465         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5466                 struct cfil_filter_stat filter_stat;
5467                 struct content_filter *cfc = content_filters[i];
5468
5469                 if (cfc == NULL) {
5470                         continue;
5471                 }
5472
5473                 /* If just asking for the size */
5474                 if (req->oldptr == USER_ADDR_NULL) {
5475                         len += sizeof(struct cfil_filter_stat);
5476                         continue;
5477                 }
5478
5479                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5480                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5481                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5482                 filter_stat.cfs_flags = cfc->cf_flags;
5483                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5484                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5485
5486                 error = SYSCTL_OUT(req, &filter_stat,
5487                     sizeof(struct cfil_filter_stat));
5488                 if (error != 0) {
5489                         break;
5490                 }
5491         }
5492         /* If just asking for the size */
5493         if (req->oldptr == USER_ADDR_NULL) {
5494                 req->oldidx = len;
5495         }
5496
5497         cfil_rw_unlock_shared(&cfil_lck_rw);
5498
5499 #if SHOW_DEBUG
5500         if (req->oldptr != USER_ADDR_NULL) {
5501                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5502                         cfil_filter_show(i);
5503                 }
5504         }
5505 #endif
5506
5507         return error;
5508 }
5509
5510 static int
5511 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5512     struct sysctl_req *req)
5513 {
5514 #pragma unused(oidp, arg1, arg2)
5515         int error = 0;
5516         u_int32_t i;
5517         struct cfil_info *cfi;
5518
5519         /* Read only  */
5520         if (req->newptr != USER_ADDR_NULL) {
5521                 return EPERM;
5522         }
5523
5524         cfil_rw_lock_shared(&cfil_lck_rw);
5525
5526         /*
5527          * If just asking for the size,
5528          */
5529         if (req->oldptr == USER_ADDR_NULL) {
5530                 req->oldidx = cfil_sock_attached_count *
5531                     sizeof(struct cfil_sock_stat);
5532                 /* Bump the length in case new sockets gets attached */
5533                 req->oldidx += req->oldidx >> 3;
5534                 goto done;
5535         }
5536
5537         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5538                 struct cfil_entry *entry;
5539                 struct cfil_sock_stat stat;
5540                 struct socket *so = cfi->cfi_so;
5541
5542                 bzero(&stat, sizeof(struct cfil_sock_stat));
5543                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5544                 stat.cfs_sock_id = cfi->cfi_sock_id;
5545                 stat.cfs_flags = cfi->cfi_flags;
5546
5547                 if (so != NULL) {
5548                         stat.cfs_pid = so->last_pid;
5549                         memcpy(stat.cfs_uuid, so->last_uuid,
5550                             sizeof(uuid_t));
5551                         if (so->so_flags & SOF_DELEGATED) {
5552                                 stat.cfs_e_pid = so->e_pid;
5553                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5554                                     sizeof(uuid_t));
5555                         } else {
5556                                 stat.cfs_e_pid = so->last_pid;
5557                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5558                                     sizeof(uuid_t));
5559                         }
5560
5561                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5562                         stat.cfs_sock_type = so->so_proto->pr_type;
5563                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5564                 }
5565
5566                 stat.cfs_snd.cbs_pending_first =
5567                     cfi->cfi_snd.cfi_pending_first;
5568                 stat.cfs_snd.cbs_pending_last =
5569                     cfi->cfi_snd.cfi_pending_last;
5570                 stat.cfs_snd.cbs_inject_q_len =
5571                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5572                 stat.cfs_snd.cbs_pass_offset =
5573                     cfi->cfi_snd.cfi_pass_offset;
5574
5575                 stat.cfs_rcv.cbs_pending_first =
5576                     cfi->cfi_rcv.cfi_pending_first;
5577                 stat.cfs_rcv.cbs_pending_last =
5578                     cfi->cfi_rcv.cfi_pending_last;
5579                 stat.cfs_rcv.cbs_inject_q_len =
5580                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5581                 stat.cfs_rcv.cbs_pass_offset =
5582                     cfi->cfi_rcv.cfi_pass_offset;
5583
5584                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5585                         struct cfil_entry_stat *estat;
5586                         struct cfe_buf *ebuf;
5587                         struct cfe_buf_stat *sbuf;
5588
5589                         entry = &cfi->cfi_entries[i];
5590
5591                         estat = &stat.ces_entries[i];
5592
5593                         estat->ces_len = sizeof(struct cfil_entry_stat);
5594                         estat->ces_filter_id = entry->cfe_filter ?
5595                             entry->cfe_filter->cf_kcunit : 0;
5596                         estat->ces_flags = entry->cfe_flags;
5597                         estat->ces_necp_control_unit =
5598                             entry->cfe_necp_control_unit;
5599
5600                         estat->ces_last_event.tv_sec =
5601                             (int64_t)entry->cfe_last_event.tv_sec;
5602                         estat->ces_last_event.tv_usec =
5603                             (int64_t)entry->cfe_last_event.tv_usec;
5604
5605                         estat->ces_last_action.tv_sec =
5606                             (int64_t)entry->cfe_last_action.tv_sec;
5607                         estat->ces_last_action.tv_usec =
5608                             (int64_t)entry->cfe_last_action.tv_usec;
5609
5610                         ebuf = &entry->cfe_snd;
5611                         sbuf = &estat->ces_snd;
5612                         sbuf->cbs_pending_first =
5613                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5614                         sbuf->cbs_pending_last =
5615                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5616                         sbuf->cbs_ctl_first =
5617                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5618                         sbuf->cbs_ctl_last =
5619                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5620                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5621                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5622                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5623
5624                         ebuf = &entry->cfe_rcv;
5625                         sbuf = &estat->ces_rcv;
5626                         sbuf->cbs_pending_first =
5627                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5628                         sbuf->cbs_pending_last =
5629                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5630                         sbuf->cbs_ctl_first =
5631                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5632                         sbuf->cbs_ctl_last =
5633                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5634                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5635                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5636                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5637                 }
5638                 error = SYSCTL_OUT(req, &stat,
5639                     sizeof(struct cfil_sock_stat));
5640                 if (error != 0) {
5641                         break;
5642                 }
5643         }
5644 done:
5645         cfil_rw_unlock_shared(&cfil_lck_rw);
5646
5647 #if SHOW_DEBUG
5648         if (req->oldptr != USER_ADDR_NULL) {
5649                 cfil_info_show();
5650         }
5651 #endif
5652
5653         return error;
5654 }
5655
5656 /*
5657  * UDP Socket Support
5658  */
5659 static void
5660 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5661 {
5662         char local[MAX_IPv6_STR_LEN + 6];
5663         char remote[MAX_IPv6_STR_LEN + 6];
5664         const void  *addr;
5665
5666         // No sock or not UDP, no-op
5667         if (so == NULL || entry == NULL) {
5668                 return;
5669         }
5670
5671         local[0] = remote[0] = 0x0;
5672
5673         switch (entry->cfentry_family) {
5674         case AF_INET6:
5675                 addr = &entry->cfentry_laddr.addr6;
5676                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5677                 addr = &entry->cfentry_faddr.addr6;
5678                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5679                 break;
5680         case AF_INET:
5681                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5682                 inet_ntop(AF_INET, addr, local, sizeof(local));
5683                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5684                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5685                 break;
5686         default:
5687                 return;
5688         }
5689
5690         CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5691             msg,
5692             IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5693             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5694             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote,
5695             entry->cfentry_flowhash);
5696 }
5697
5698 static void
5699 cfil_inp_log(int level, struct socket *so, const char* msg)
5700 {
5701         struct inpcb *inp = NULL;
5702         char local[MAX_IPv6_STR_LEN + 6];
5703         char remote[MAX_IPv6_STR_LEN + 6];
5704         const void  *addr;
5705
5706         if (so == NULL) {
5707                 return;
5708         }
5709
5710         inp = sotoinpcb(so);
5711         if (inp == NULL) {
5712                 return;
5713         }
5714
5715         local[0] = remote[0] = 0x0;
5716
5717         if (inp->inp_vflag & INP_IPV6) {
5718                 addr = &inp->in6p_laddr.s6_addr32;
5719                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5720                 addr = &inp->in6p_faddr.s6_addr32;
5721                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5722         } else {
5723                 addr = &inp->inp_laddr.s_addr;
5724                 inet_ntop(AF_INET, addr, local, sizeof(local));
5725                 addr = &inp->inp_faddr.s_addr;
5726                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5727         }
5728
5729         if (so->so_cfil != NULL) {
5730                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5731                     msg, IS_UDP(so) ? "UDP" : "TCP",
5732                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5733                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5734         } else {
5735                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5736                     msg, IS_UDP(so) ? "UDP" : "TCP",
5737                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5738                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5739         }
5740 }
5741
5742 static void
5743 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5744 {
5745         if (cfil_info == NULL) {
5746                 return;
5747         }
5748
5749         if (cfil_info->cfi_hash_entry != NULL) {
5750                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5751         } else {
5752                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5753         }
5754 }
5755
5756 errno_t
5757 cfil_db_init(struct socket *so)
5758 {
5759         errno_t error = 0;
5760         struct cfil_db *db = NULL;
5761
5762         CFIL_LOG(LOG_INFO, "");
5763
5764         db = zalloc(cfil_db_zone);
5765         if (db == NULL) {
5766                 error = ENOMEM;
5767                 goto done;
5768         }
5769         bzero(db, sizeof(struct cfil_db));
5770         db->cfdb_so = so;
5771         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5772         if (db->cfdb_hashbase == NULL) {
5773                 zfree(cfil_db_zone, db);
5774                 db = NULL;
5775                 error = ENOMEM;
5776                 goto done;
5777         }
5778
5779         so->so_cfil_db = db;
5780
5781 done:
5782         return error;
5783 }
5784
5785 void
5786 cfil_db_free(struct socket *so)
5787 {
5788         struct cfil_hash_entry *entry = NULL;
5789         struct cfil_hash_entry *temp_entry = NULL;
5790         struct cfilhashhead *cfilhash = NULL;
5791         struct cfil_db *db = NULL;
5792
5793         CFIL_LOG(LOG_INFO, "");
5794
5795         if (so == NULL || so->so_cfil_db == NULL) {
5796                 return;
5797         }
5798         db = so->so_cfil_db;
5799
5800 #if LIFECYCLE_DEBUG
5801         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5802             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5803 #endif
5804
5805         for (int i = 0; i < CFILHASHSIZE; i++) {
5806                 cfilhash = &db->cfdb_hashbase[i];
5807                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5808                         if (entry->cfentry_cfil != NULL) {
5809 #if LIFECYCLE_DEBUG
5810                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5811 #endif
5812                                 CFIL_INFO_FREE(entry->cfentry_cfil);
5813                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5814                                 entry->cfentry_cfil = NULL;
5815                         }
5816
5817                         cfil_db_delete_entry(db, entry);
5818                         if (so->so_flags & SOF_CONTENT_FILTER) {
5819                                 if (db->cfdb_count == 0) {
5820                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5821                                 }
5822                                 VERIFY(so->so_usecount > 0);
5823                                 so->so_usecount--;
5824                         }
5825                 }
5826         }
5827
5828         // Make sure all entries are cleaned up!
5829         VERIFY(db->cfdb_count == 0);
5830 #if LIFECYCLE_DEBUG
5831         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5832 #endif
5833
5834         hashdestroy(db->cfdb_hashbase, M_CFIL, db->cfdb_hashmask);
5835         zfree(cfil_db_zone, db);
5836         so->so_cfil_db = NULL;
5837 }
5838
5839 static bool
5840 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr, bool islocalUpdate)
5841 {
5842         struct sockaddr_in *sin = NULL;
5843         struct sockaddr_in6 *sin6 = NULL;
5844
5845         if (entry == NULL || addr == NULL) {
5846                 return FALSE;
5847         }
5848
5849         switch (addr->sa_family) {
5850         case AF_INET:
5851                 sin = satosin(addr);
5852                 if (sin->sin_len != sizeof(*sin)) {
5853                         return FALSE;
5854                 }
5855                 if (isLocal == TRUE) {
5856                         if (sin->sin_port) {
5857                                 entry->cfentry_lport = sin->sin_port;
5858                                 if (islocalUpdate) {
5859                                         entry->cfentry_lport_updated = TRUE;
5860                                 }
5861                         }
5862                         if (sin->sin_addr.s_addr) {
5863                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5864                                 if (islocalUpdate) {
5865                                         entry->cfentry_laddr_updated = TRUE;
5866                                 }
5867                         }
5868                 } else {
5869                         if (sin->sin_port) {
5870                                 entry->cfentry_fport = sin->sin_port;
5871                         }
5872                         if (sin->sin_addr.s_addr) {
5873                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5874                         }
5875                 }
5876                 entry->cfentry_family = AF_INET;
5877                 return TRUE;
5878         case AF_INET6:
5879                 sin6 = satosin6(addr);
5880                 if (sin6->sin6_len != sizeof(*sin6)) {
5881                         return FALSE;
5882                 }
5883                 if (isLocal == TRUE) {
5884                         if (sin6->sin6_port) {
5885                                 entry->cfentry_lport = sin6->sin6_port;
5886                                 if (islocalUpdate) {
5887                                         entry->cfentry_lport_updated = TRUE;
5888                                 }
5889                         }
5890                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5891                                 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5892                                 if (islocalUpdate) {
5893                                         entry->cfentry_laddr_updated = TRUE;
5894                                 }
5895                         }
5896                 } else {
5897                         if (sin6->sin6_port) {
5898                                 entry->cfentry_fport = sin6->sin6_port;
5899                         }
5900                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5901                                 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5902                         }
5903                 }
5904                 entry->cfentry_family = AF_INET6;
5905                 return TRUE;
5906         default:
5907                 return FALSE;
5908         }
5909 }
5910
5911 static bool
5912 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp, bool islocalUpdate)
5913 {
5914         if (entry == NULL || inp == NULL) {
5915                 return FALSE;
5916         }
5917
5918         if (inp->inp_vflag & INP_IPV6) {
5919                 if (isLocal == TRUE) {
5920                         if (inp->inp_lport) {
5921                                 entry->cfentry_lport = inp->inp_lport;
5922                                 if (islocalUpdate) {
5923                                         entry->cfentry_lport_updated = TRUE;
5924                                 }
5925                         }
5926                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
5927                                 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5928                                 if (islocalUpdate) {
5929                                         entry->cfentry_laddr_updated = TRUE;
5930                                 }
5931                         }
5932                 } else {
5933                         if (inp->inp_fport) {
5934                                 entry->cfentry_fport = inp->inp_fport;
5935                         }
5936                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
5937                                 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5938                         }
5939                 }
5940                 entry->cfentry_family = AF_INET6;
5941                 return TRUE;
5942         } else if (inp->inp_vflag & INP_IPV4) {
5943                 if (isLocal == TRUE) {
5944                         if (inp->inp_lport) {
5945                                 entry->cfentry_lport = inp->inp_lport;
5946                                 if (islocalUpdate) {
5947                                         entry->cfentry_lport_updated = TRUE;
5948                                 }
5949                         }
5950                         if (inp->inp_laddr.s_addr) {
5951                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5952                                 if (islocalUpdate) {
5953                                         entry->cfentry_laddr_updated = TRUE;
5954                                 }
5955                         }
5956                 } else {
5957                         if (inp->inp_fport) {
5958                                 entry->cfentry_fport = inp->inp_fport;
5959                         }
5960                         if (inp->inp_faddr.s_addr) {
5961                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5962                         }
5963                 }
5964                 entry->cfentry_family = AF_INET;
5965                 return TRUE;
5966         }
5967         return FALSE;
5968 }
5969
5970 bool
5971 check_port(struct sockaddr *addr, u_short port)
5972 {
5973         struct sockaddr_in *sin = NULL;
5974         struct sockaddr_in6 *sin6 = NULL;
5975
5976         if (addr == NULL || port == 0) {
5977                 return FALSE;
5978         }
5979
5980         switch (addr->sa_family) {
5981         case AF_INET:
5982                 sin = satosin(addr);
5983                 if (sin->sin_len != sizeof(*sin)) {
5984                         return FALSE;
5985                 }
5986                 if (port == ntohs(sin->sin_port)) {
5987                         return TRUE;
5988                 }
5989                 break;
5990         case AF_INET6:
5991                 sin6 = satosin6(addr);
5992                 if (sin6->sin6_len != sizeof(*sin6)) {
5993                         return FALSE;
5994                 }
5995                 if (port == ntohs(sin6->sin6_port)) {
5996                         return TRUE;
5997                 }
5998                 break;
5999         default:
6000                 break;
6001         }
6002         return FALSE;
6003 }
6004
6005 struct cfil_hash_entry *
6006 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
6007 {
6008         struct cfilhashhead *cfilhash = NULL;
6009         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
6010         struct cfil_hash_entry *nextentry;
6011
6012         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
6013                 return NULL;
6014         }
6015
6016         flowhash &= db->cfdb_hashmask;
6017         cfilhash = &db->cfdb_hashbase[flowhash];
6018
6019         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6020                 if (nextentry->cfentry_cfil != NULL &&
6021                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
6022                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
6023                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
6024                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
6025                         return nextentry;
6026                 }
6027         }
6028
6029         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
6030             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
6031         return NULL;
6032 }
6033
6034 struct cfil_hash_entry *
6035 cfil_db_lookup_entry_internal(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly, boolean_t withLocalPort)
6036 {
6037         struct cfil_hash_entry matchentry = { };
6038         struct cfil_hash_entry *nextentry = NULL;
6039         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6040         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6041         u_int16_t hashkey_fport = 0, hashkey_lport = 0;
6042         int inp_hash_element = 0;
6043         struct cfilhashhead *cfilhash = NULL;
6044
6045         CFIL_LOG(LOG_INFO, "");
6046
6047         if (inp == NULL) {
6048                 goto done;
6049         }
6050
6051         if (local != NULL) {
6052                 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local, FALSE);
6053         } else {
6054                 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp, FALSE);
6055         }
6056         if (remote != NULL) {
6057                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote, FALSE);
6058         } else {
6059                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp, FALSE);
6060         }
6061
6062         if (inp->inp_vflag & INP_IPV6) {
6063                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
6064                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
6065         } else {
6066                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
6067                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
6068         }
6069
6070         hashkey_fport = matchentry.cfentry_fport;
6071         hashkey_lport = (remoteOnly == false || withLocalPort == true) ? matchentry.cfentry_lport : 0;
6072
6073         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
6074         inp_hash_element &= db->cfdb_hashmask;
6075         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6076
6077         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6078                 if ((inp->inp_vflag & INP_IPV6) &&
6079                     (remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6080                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6081                     (remoteOnly || nextentry->cfentry_laddr_updated || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
6082                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
6083 #if DATA_DEBUG
6084                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6085 #endif
6086                         return nextentry;
6087                 } else if ((remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6088                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6089                     (remoteOnly || nextentry->cfentry_laddr_updated || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
6090                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
6091 #if DATA_DEBUG
6092                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6093 #endif
6094                         return nextentry;
6095                 }
6096         }
6097
6098 done:
6099 #if DATA_DEBUG
6100         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6101 #endif
6102         return NULL;
6103 }
6104
6105 struct cfil_hash_entry *
6106 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
6107 {
6108         struct cfil_hash_entry *entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, false);
6109         if (entry == NULL && remoteOnly == true) {
6110                 entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, true);
6111         }
6112         return entry;
6113 }
6114
6115 cfil_sock_id_t
6116 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6117 {
6118         struct cfil_hash_entry *hash_entry = NULL;
6119
6120         socket_lock_assert_owned(so);
6121
6122         if (so->so_cfil_db == NULL) {
6123                 return CFIL_SOCK_ID_NONE;
6124         }
6125
6126         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6127         if (hash_entry == NULL) {
6128                 // No match with both local and remote, try match with remote only
6129                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6130         }
6131         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6132                 return CFIL_SOCK_ID_NONE;
6133         }
6134
6135         return hash_entry->cfentry_cfil->cfi_sock_id;
6136 }
6137
6138 void
6139 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6140 {
6141         if (hash_entry == NULL) {
6142                 return;
6143         }
6144         if (db == NULL || db->cfdb_count == 0) {
6145                 return;
6146         }
6147         db->cfdb_count--;
6148         if (db->cfdb_only_entry == hash_entry) {
6149                 db->cfdb_only_entry = NULL;
6150         }
6151         LIST_REMOVE(hash_entry, cfentry_link);
6152         zfree(cfil_hash_entry_zone, hash_entry);
6153 }
6154
6155 struct cfil_hash_entry *
6156 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6157 {
6158         struct cfil_hash_entry *entry = NULL;
6159         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6160         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6161         int inp_hash_element = 0;
6162         struct cfilhashhead *cfilhash = NULL;
6163
6164         CFIL_LOG(LOG_INFO, "");
6165
6166         if (inp == NULL) {
6167                 goto done;
6168         }
6169
6170         entry = zalloc(cfil_hash_entry_zone);
6171         if (entry == NULL) {
6172                 goto done;
6173         }
6174         bzero(entry, sizeof(struct cfil_hash_entry));
6175
6176         if (local != NULL) {
6177                 fill_cfil_hash_entry_from_address(entry, TRUE, local, FALSE);
6178         } else {
6179                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, FALSE);
6180         }
6181         if (remote != NULL) {
6182                 fill_cfil_hash_entry_from_address(entry, FALSE, remote, FALSE);
6183         } else {
6184                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp, FALSE);
6185         }
6186         entry->cfentry_lastused = net_uptime();
6187
6188         if (inp->inp_vflag & INP_IPV6) {
6189                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6190                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6191         } else {
6192                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6193                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6194         }
6195         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6196             entry->cfentry_lport, entry->cfentry_fport);
6197         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6198
6199         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6200
6201         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6202         db->cfdb_count++;
6203         db->cfdb_only_entry = entry;
6204         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6205
6206 done:
6207         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6208         return entry;
6209 }
6210
6211 void
6212 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local, struct mbuf *control)
6213 {
6214         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6215         union sockaddr_in_4_6 address_buf = { };
6216
6217         CFIL_LOG(LOG_INFO, "");
6218
6219         if (inp == NULL || entry == NULL) {
6220                 return;
6221         }
6222
6223         if (LOCAL_ADDRESS_NEEDS_UPDATE(entry)) {
6224                 // Flow does not have a local address yet.  Retrieve local address
6225                 // from control mbufs if present.
6226                 if (local == NULL && control != NULL) {
6227                         uint8_t *addr_ptr = NULL;
6228                         int size = cfil_sock_udp_get_address_from_control(entry->cfentry_family, control, &addr_ptr);
6229
6230                         if (size && addr_ptr) {
6231                                 switch (entry->cfentry_family) {
6232                                 case AF_INET:
6233                                         if (size == sizeof(struct in_addr)) {
6234                                                 address_buf.sin.sin_port = 0;
6235                                                 address_buf.sin.sin_family = AF_INET;
6236                                                 address_buf.sin.sin_len = sizeof(struct sockaddr_in);
6237                                                 (void) memcpy(&address_buf.sin.sin_addr, addr_ptr, sizeof(struct in_addr));
6238                                                 local = sintosa(&address_buf.sin);
6239                                         }
6240                                         break;
6241                                 case AF_INET6:
6242                                         if (size == sizeof(struct in6_addr)) {
6243                                                 address_buf.sin6.sin6_port = 0;
6244                                                 address_buf.sin6.sin6_family = AF_INET6;
6245                                                 address_buf.sin6.sin6_len = sizeof(struct sockaddr_in6);
6246                                                 (void) memcpy(&address_buf.sin6.sin6_addr, addr_ptr, sizeof(struct in6_addr));
6247                                                 local = sin6tosa(&address_buf.sin6);
6248                                         }
6249                                         break;
6250                                 default:
6251                                         break;
6252                                 }
6253                         }
6254                 }
6255                 if (local != NULL) {
6256                         fill_cfil_hash_entry_from_address(entry, TRUE, local, TRUE);
6257                 } else {
6258                         fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6259                 }
6260         }
6261
6262         if (LOCAL_PORT_NEEDS_UPDATE(entry, db->cfdb_so)) {
6263                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6264         }
6265
6266         return;
6267 }
6268
6269 struct cfil_info *
6270 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6271 {
6272         struct cfil_hash_entry *hash_entry = NULL;
6273
6274         CFIL_LOG(LOG_INFO, "");
6275
6276         if (db == NULL || id == 0) {
6277                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6278                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6279                 return NULL;
6280         }
6281
6282         // This is an optimization for connected UDP socket which only has one flow.
6283         // No need to do the hash lookup.
6284         if (db->cfdb_count == 1) {
6285                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6286                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6287                         return db->cfdb_only_entry->cfentry_cfil;
6288                 }
6289         }
6290
6291         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6292         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6293 }
6294
6295 struct cfil_hash_entry *
6296 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, struct mbuf *control, int debug)
6297 {
6298         struct cfil_hash_entry *hash_entry = NULL;
6299         int new_filter_control_unit = 0;
6300
6301         errno_t error = 0;
6302         socket_lock_assert_owned(so);
6303
6304         // If new socket, allocate cfil db
6305         if (so->so_cfil_db == NULL) {
6306                 if (cfil_db_init(so) != 0) {
6307                         return NULL;
6308                 }
6309         }
6310
6311         // See if flow already exists.
6312         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6313         if (hash_entry == NULL) {
6314                 // No match with both local and remote, try match with remote only
6315                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6316         }
6317         if (hash_entry != NULL) {
6318                 /* Drop pre-existing UDP flow if filter state changed */
6319                 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6320                 if (new_filter_control_unit > 0 &&
6321                     new_filter_control_unit != hash_entry->cfentry_cfil->cfi_filter_control_unit) {
6322                         return NULL;
6323                 }
6324
6325                 // Try to update flow info from socket and/or control mbufs if necessary
6326                 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry) || LOCAL_PORT_NEEDS_UPDATE(hash_entry, so)) {
6327                         cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6328                 }
6329                 return hash_entry;
6330         }
6331
6332         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6333         if (hash_entry == NULL) {
6334                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6335                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6336                 return NULL;
6337         }
6338
6339         if (cfil_info_alloc(so, hash_entry) == NULL ||
6340             hash_entry->cfentry_cfil == NULL) {
6341                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6342                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6343                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6344                 return NULL;
6345         }
6346         hash_entry->cfentry_cfil->cfi_filter_control_unit = filter_control_unit;
6347         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6348         hash_entry->cfentry_cfil->cfi_debug = debug;
6349
6350 #if LIFECYCLE_DEBUG
6351         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6352 #endif
6353
6354         // Check if we can update the new flow's local address from control mbufs
6355         if (control != NULL) {
6356                 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6357         }
6358
6359         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6360                 CFIL_INFO_FREE(hash_entry->cfentry_cfil);
6361                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6362                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6363                     filter_control_unit);
6364                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6365                 return NULL;
6366         }
6367         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6368             (uint64_t)VM_KERNEL_ADDRPERM(so),
6369             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6370
6371         so->so_flags |= SOF_CONTENT_FILTER;
6372         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6373
6374         /* Hold a reference on the socket for each flow */
6375         so->so_usecount++;
6376
6377         if (debug) {
6378                 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6379         }
6380
6381         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6382             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6383         /* We can recover from flow control or out of memory errors */
6384         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6385                 return NULL;
6386         }
6387
6388         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6389         return hash_entry;
6390 }
6391
6392 int
6393 cfil_sock_udp_get_address_from_control(sa_family_t family, struct mbuf *control, uint8_t **address_ptr)
6394 {
6395         struct cmsghdr *cm;
6396         struct in6_pktinfo *pi6;
6397
6398         if (control == NULL || address_ptr == NULL) {
6399                 return 0;
6400         }
6401
6402         while (control) {
6403                 if (control->m_type != MT_CONTROL) {
6404                         control = control->m_next;
6405                         continue;
6406                 }
6407
6408                 for (cm = M_FIRST_CMSGHDR(control);
6409                     is_cmsg_valid(control, cm);
6410                     cm = M_NXT_CMSGHDR(control, cm)) {
6411                         switch (cm->cmsg_type) {
6412                         case IP_RECVDSTADDR:
6413                                 if (family == AF_INET &&
6414                                     cm->cmsg_level == IPPROTO_IP &&
6415                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in_addr))) {
6416                                         *address_ptr = CMSG_DATA(cm);
6417                                         return sizeof(struct in_addr);
6418                                 }
6419                                 break;
6420                         case IPV6_PKTINFO:
6421                         case IPV6_2292PKTINFO:
6422                                 if (family == AF_INET6 &&
6423                                     cm->cmsg_level == IPPROTO_IPV6 &&
6424                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) {
6425                                         pi6 = (struct in6_pktinfo *)(void *)CMSG_DATA(cm);
6426                                         *address_ptr = (uint8_t *)&pi6->ipi6_addr;
6427                                         return sizeof(struct in6_addr);
6428                                 }
6429                                 break;
6430                         default:
6431                                 break;
6432                         }
6433                 }
6434
6435                 control = control->m_next;
6436         }
6437         return 0;
6438 }
6439
6440 errno_t
6441 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6442     struct sockaddr *local, struct sockaddr *remote,
6443     struct mbuf *data, struct mbuf *control, uint32_t flags)
6444 {
6445 #pragma unused(outgoing, so, local, remote, data, control, flags)
6446         errno_t error = 0;
6447         uint32_t filter_control_unit;
6448         struct cfil_hash_entry *hash_entry = NULL;
6449         struct cfil_info *cfil_info = NULL;
6450         int debug = 0;
6451
6452         socket_lock_assert_owned(so);
6453
6454         if (cfil_active_count == 0) {
6455                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6456                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6457                 return error;
6458         }
6459
6460         // Socket has been blessed
6461         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6462                 return error;
6463         }
6464
6465         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6466         if (filter_control_unit == 0) {
6467                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6468                 return error;
6469         }
6470
6471         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6472                 return error;
6473         }
6474
6475         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6476                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6477                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6478                 return error;
6479         }
6480
6481         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, control, debug);
6482         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6483                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6484                 return EPIPE;
6485         }
6486         // Update last used timestamp, this is for flow Idle TO
6487         hash_entry->cfentry_lastused = net_uptime();
6488         cfil_info = hash_entry->cfentry_cfil;
6489
6490         if (cfil_info->cfi_flags & CFIF_DROP) {
6491 #if DATA_DEBUG
6492                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6493 #endif
6494                 return EPIPE;
6495         }
6496         if (control != NULL) {
6497                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6498         }
6499         if (data->m_type == MT_OOBDATA) {
6500                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6501                     (uint64_t)VM_KERNEL_ADDRPERM(so));
6502                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6503         }
6504
6505         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6506
6507         return error;
6508 }
6509
6510 /*
6511  * Go through all UDP flows for specified socket and returns TRUE if
6512  * any flow is still attached.  If need_wait is TRUE, wait on first
6513  * attached flow.
6514  */
6515 static int
6516 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6517 {
6518         struct timespec ts;
6519         lck_mtx_t *mutex_held;
6520         struct cfilhashhead *cfilhash = NULL;
6521         struct cfil_db *db = NULL;
6522         struct cfil_hash_entry *hash_entry = NULL;
6523         struct cfil_hash_entry *temp_hash_entry = NULL;
6524         struct cfil_info *cfil_info = NULL;
6525         struct cfil_entry *entry = NULL;
6526         errno_t error = 0;
6527         int kcunit;
6528         int attached = 0;
6529         uint64_t sock_flow_id = 0;
6530
6531         socket_lock_assert_owned(so);
6532
6533         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6534                 if (so->so_proto->pr_getlock != NULL) {
6535                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6536                 } else {
6537                         mutex_held = so->so_proto->pr_domain->dom_mtx;
6538                 }
6539                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6540
6541                 db = so->so_cfil_db;
6542
6543                 for (int i = 0; i < CFILHASHSIZE; i++) {
6544                         cfilhash = &db->cfdb_hashbase[i];
6545
6546                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6547                                 if (hash_entry->cfentry_cfil != NULL) {
6548                                         cfil_info = hash_entry->cfentry_cfil;
6549                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6550                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
6551
6552                                                 /* Are we attached to the filter? */
6553                                                 if (entry->cfe_filter == NULL) {
6554                                                         continue;
6555                                                 }
6556
6557                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6558                                                         continue;
6559                                                 }
6560                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6561                                                         continue;
6562                                                 }
6563
6564                                                 attached = 1;
6565
6566                                                 if (need_wait == TRUE) {
6567 #if LIFECYCLE_DEBUG
6568                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6569 #endif
6570
6571                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
6572                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6573                                                             NSEC_PER_USEC * 1000;
6574
6575                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6576                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6577                                                         sock_flow_id = cfil_info->cfi_sock_id;
6578
6579                                                         error = msleep((caddr_t)cfil_info, mutex_held,
6580                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6581
6582                                                         // Woke up from sleep, validate if cfil_info is still valid
6583                                                         if (so->so_cfil_db == NULL ||
6584                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6585                                                                 // cfil_info is not valid, do not continue
6586                                                                 goto done;
6587                                                         }
6588
6589                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6590
6591 #if LIFECYCLE_DEBUG
6592                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6593 #endif
6594
6595                                                         /*
6596                                                          * Force close in case of timeout
6597                                                          */
6598                                                         if (error != 0) {
6599                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6600 #if LIFECYCLE_DEBUG
6601                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6602 #endif
6603                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6604                                                         }
6605                                                 }
6606                                                 goto done;
6607                                         }
6608                                 }
6609                         }
6610                 }
6611         }
6612
6613 done:
6614         return attached;
6615 }
6616
6617 int32_t
6618 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6619 {
6620         struct socket *so = sb->sb_so;
6621         struct cfi_buf *cfi_buf;
6622         uint64_t pending = 0;
6623         uint64_t total_pending = 0;
6624         struct cfilhashhead *cfilhash = NULL;
6625         struct cfil_db *db = NULL;
6626         struct cfil_hash_entry *hash_entry = NULL;
6627         struct cfil_hash_entry *temp_hash_entry = NULL;
6628
6629         socket_lock_assert_owned(so);
6630
6631         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6632             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6633                 db = so->so_cfil_db;
6634
6635                 for (int i = 0; i < CFILHASHSIZE; i++) {
6636                         cfilhash = &db->cfdb_hashbase[i];
6637
6638                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6639                                 if (hash_entry->cfentry_cfil != NULL) {
6640                                         if ((sb->sb_flags & SB_RECV) == 0) {
6641                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6642                                         } else {
6643                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6644                                         }
6645
6646                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6647                                         /*
6648                                          * If we are limited by the "chars of mbufs used" roughly
6649                                          * adjust so we won't overcommit
6650                                          */
6651                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6652                                                 pending = cfi_buf->cfi_pending_mbcnt;
6653                                         }
6654
6655                                         total_pending += pending;
6656                                 }
6657                         }
6658                 }
6659
6660                 VERIFY(total_pending < INT32_MAX);
6661 #if DATA_DEBUG
6662                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6663                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6664                     total_pending, check_thread);
6665 #endif
6666         }
6667
6668         return (int32_t)(total_pending);
6669 }
6670
6671 int
6672 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6673 {
6674         struct cfil_info *cfil_info = NULL;
6675         struct cfilhashhead *cfilhash = NULL;
6676         struct cfil_db *db = NULL;
6677         struct cfil_hash_entry *hash_entry = NULL;
6678         struct cfil_hash_entry *temp_hash_entry = NULL;
6679         errno_t error = 0;
6680         int done_count = 0;
6681         int kcunit;
6682
6683         socket_lock_assert_owned(so);
6684
6685         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6686                 db = so->so_cfil_db;
6687
6688                 for (int i = 0; i < CFILHASHSIZE; i++) {
6689                         cfilhash = &db->cfdb_hashbase[i];
6690
6691                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6692                                 if (hash_entry->cfentry_cfil != NULL) {
6693                                         cfil_info = hash_entry->cfentry_cfil;
6694
6695                                         // This flow is marked as DROP
6696                                         if (cfil_info->cfi_flags & drop_flag) {
6697                                                 done_count++;
6698                                                 continue;
6699                                         }
6700
6701                                         // This flow has been shut already, skip
6702                                         if (cfil_info->cfi_flags & shut_flag) {
6703                                                 continue;
6704                                         }
6705                                         // Mark flow as shut
6706                                         cfil_info->cfi_flags |= shut_flag;
6707                                         done_count++;
6708
6709                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6710                                                 /* Disconnect incoming side */
6711                                                 if (how != SHUT_WR) {
6712                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6713                                                 }
6714                                                 /* Disconnect outgoing side */
6715                                                 if (how != SHUT_RD) {
6716                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6717                                                 }
6718                                         }
6719                                 }
6720                         }
6721                 }
6722         }
6723
6724         if (done_count == 0) {
6725                 error = ENOTCONN;
6726         }
6727         return error;
6728 }
6729
6730 int
6731 cfil_sock_udp_shutdown(struct socket *so, int *how)
6732 {
6733         int error = 0;
6734
6735         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6736                 goto done;
6737         }
6738
6739         socket_lock_assert_owned(so);
6740
6741         CFIL_LOG(LOG_INFO, "so %llx how %d",
6742             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6743
6744         /*
6745          * Check the state of the socket before the content filter
6746          */
6747         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6748                 /* read already shut down */
6749                 error = ENOTCONN;
6750                 goto done;
6751         }
6752         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6753                 /* write already shut down */
6754                 error = ENOTCONN;
6755                 goto done;
6756         }
6757
6758         /*
6759          * shutdown read: SHUT_RD or SHUT_RDWR
6760          */
6761         if (*how != SHUT_WR) {
6762                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6763                 if (error != 0) {
6764                         goto done;
6765                 }
6766         }
6767         /*
6768          * shutdown write: SHUT_WR or SHUT_RDWR
6769          */
6770         if (*how != SHUT_RD) {
6771                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6772                 if (error != 0) {
6773                         goto done;
6774                 }
6775
6776                 /*
6777                  * When outgoing data is pending, we delay the shutdown at the
6778                  * protocol level until the content filters give the final
6779                  * verdict on the pending data.
6780                  */
6781                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6782                         /*
6783                          * When shutting down the read and write sides at once
6784                          * we can proceed to the final shutdown of the read
6785                          * side. Otherwise, we just return.
6786                          */
6787                         if (*how == SHUT_WR) {
6788                                 error = EJUSTRETURN;
6789                         } else if (*how == SHUT_RDWR) {
6790                                 *how = SHUT_RD;
6791                         }
6792                 }
6793         }
6794 done:
6795         return error;
6796 }
6797
6798 void
6799 cfil_sock_udp_close_wait(struct socket *so)
6800 {
6801         socket_lock_assert_owned(so);
6802
6803         while (cfil_filters_udp_attached(so, FALSE)) {
6804                 /*
6805                  * Notify the filters we are going away so they can detach
6806                  */
6807                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6808
6809                 /*
6810                  * Make sure we need to wait after the filter are notified
6811                  * of the disconnection
6812                  */
6813                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6814                         break;
6815                 }
6816         }
6817 }
6818
6819 void
6820 cfil_sock_udp_is_closed(struct socket *so)
6821 {
6822         struct cfil_info *cfil_info = NULL;
6823         struct cfilhashhead *cfilhash = NULL;
6824         struct cfil_db *db = NULL;
6825         struct cfil_hash_entry *hash_entry = NULL;
6826         struct cfil_hash_entry *temp_hash_entry = NULL;
6827         errno_t error = 0;
6828         int kcunit;
6829
6830         socket_lock_assert_owned(so);
6831
6832         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6833                 db = so->so_cfil_db;
6834
6835                 for (int i = 0; i < CFILHASHSIZE; i++) {
6836                         cfilhash = &db->cfdb_hashbase[i];
6837
6838                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6839                                 if (hash_entry->cfentry_cfil != NULL) {
6840                                         cfil_info = hash_entry->cfentry_cfil;
6841
6842                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6843                                                 /* Let the filters know of the closing */
6844                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6845                                         }
6846
6847                                         /* Last chance to push passed data out */
6848                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6849                                         if (error == 0) {
6850                                                 cfil_service_inject_queue(so, cfil_info, 1);
6851                                         }
6852                                         cfil_release_sockbuf(so, 1);
6853
6854                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6855
6856                                         /* Pending data needs to go */
6857                                         cfil_flush_queues(so, cfil_info);
6858
6859                                         CFIL_INFO_VERIFY(cfil_info);
6860                                 }
6861                         }
6862                 }
6863         }
6864 }
6865
6866 void
6867 cfil_sock_udp_buf_update(struct sockbuf *sb)
6868 {
6869         struct cfil_info *cfil_info = NULL;
6870         struct cfilhashhead *cfilhash = NULL;
6871         struct cfil_db *db = NULL;
6872         struct cfil_hash_entry *hash_entry = NULL;
6873         struct cfil_hash_entry *temp_hash_entry = NULL;
6874         errno_t error = 0;
6875         int outgoing;
6876         struct socket *so = sb->sb_so;
6877
6878         socket_lock_assert_owned(so);
6879
6880         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6881                 if (!cfil_sbtrim) {
6882                         return;
6883                 }
6884
6885                 db = so->so_cfil_db;
6886
6887                 for (int i = 0; i < CFILHASHSIZE; i++) {
6888                         cfilhash = &db->cfdb_hashbase[i];
6889
6890                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6891                                 if (hash_entry->cfentry_cfil != NULL) {
6892                                         cfil_info = hash_entry->cfentry_cfil;
6893
6894                                         if ((sb->sb_flags & SB_RECV) == 0) {
6895                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6896                                                         return;
6897                                                 }
6898                                                 outgoing = 1;
6899                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6900                                         } else {
6901                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6902                                                         return;
6903                                                 }
6904                                                 outgoing = 0;
6905                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6906                                         }
6907
6908                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6909                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6910
6911                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6912                                         if (error == 0) {
6913                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6914                                         }
6915                                         cfil_release_sockbuf(so, outgoing);
6916                                 }
6917                         }
6918                 }
6919         }
6920 }
6921
6922 void
6923 cfil_filter_show(u_int32_t kcunit)
6924 {
6925         struct content_filter *cfc = NULL;
6926         struct cfil_entry *entry;
6927         int count = 0;
6928
6929         if (content_filters == NULL) {
6930                 return;
6931         }
6932         if (kcunit > MAX_CONTENT_FILTER) {
6933                 return;
6934         }
6935
6936         cfil_rw_lock_shared(&cfil_lck_rw);
6937
6938         if (content_filters[kcunit - 1] == NULL) {
6939                 cfil_rw_unlock_shared(&cfil_lck_rw);
6940                 return;
6941         }
6942         cfc = content_filters[kcunit - 1];
6943
6944         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6945             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6946         if (cfc->cf_flags & CFF_DETACHING) {
6947                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6948         }
6949         if (cfc->cf_flags & CFF_ACTIVE) {
6950                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6951         }
6952         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6953                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6954         }
6955
6956         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6957                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6958                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6959
6960                         count++;
6961
6962                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6963                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6964                         } else {
6965                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6966                         }
6967                 }
6968         }
6969
6970         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6971
6972         cfil_rw_unlock_shared(&cfil_lck_rw);
6973 }
6974
6975 void
6976 cfil_info_show(void)
6977 {
6978         struct cfil_info *cfil_info;
6979         int count = 0;
6980
6981         cfil_rw_lock_shared(&cfil_lck_rw);
6982
6983         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6984
6985         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6986                 count++;
6987
6988                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6989
6990                 if (cfil_info->cfi_flags & CFIF_DROP) {
6991                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6992                 }
6993                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6994                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6995                 }
6996                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6997                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6998                 }
6999                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
7000                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
7001                 }
7002                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
7003                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
7004                 }
7005                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
7006                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
7007                 }
7008                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
7009                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
7010                 }
7011         }
7012
7013         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
7014
7015         cfil_rw_unlock_shared(&cfil_lck_rw);
7016 }
7017
7018 bool
7019 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int64_t current_time)
7020 {
7021         if (cfil_info && cfil_info->cfi_hash_entry &&
7022             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int64_t)timeout)) {
7023 #if GC_DEBUG
7024                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
7025 #endif
7026                 return true;
7027         }
7028         return false;
7029 }
7030
7031 bool
7032 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
7033 {
7034         struct cfil_entry *entry;
7035         struct timeval current_tv;
7036         struct timeval diff_time;
7037
7038         if (cfil_info == NULL) {
7039                 return false;
7040         }
7041
7042         /*
7043          * If we have queued up more data than passed offset and we haven't received
7044          * an action from user space for a while (the user space filter might have crashed),
7045          * return action timed out.
7046          */
7047         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
7048             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
7049                 microuptime(&current_tv);
7050
7051                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7052                         entry = &cfil_info->cfi_entries[kcunit - 1];
7053
7054                         if (entry->cfe_filter == NULL) {
7055                                 continue;
7056                         }
7057
7058                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
7059                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
7060                                 // haven't gotten an action from this filter, check timeout
7061                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
7062                                 if (diff_time.tv_sec >= timeout) {
7063 #if GC_DEBUG
7064                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
7065 #endif
7066                                         return true;
7067                                 }
7068                         }
7069                 }
7070         }
7071         return false;
7072 }
7073
7074 bool
7075 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7076 {
7077         if (cfil_info == NULL) {
7078                 return false;
7079         }
7080
7081         /*
7082          * Clean up flow if it exceeded queue thresholds
7083          */
7084         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7085             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7086 #if GC_DEBUG
7087                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7088                     cfil_udp_gc_mbuf_num_max,
7089                     cfil_udp_gc_mbuf_cnt_max,
7090                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
7091                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7092                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7093 #endif
7094                 return true;
7095         }
7096
7097         return false;
7098 }
7099
7100 static void
7101 cfil_udp_gc_thread_sleep(bool forever)
7102 {
7103         if (forever) {
7104                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
7105                     THREAD_INTERRUPTIBLE);
7106         } else {
7107                 uint64_t deadline = 0;
7108                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
7109                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7110
7111                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
7112                     THREAD_INTERRUPTIBLE, deadline);
7113         }
7114 }
7115
7116 static void
7117 cfil_udp_gc_thread_func(void *v, wait_result_t w)
7118 {
7119 #pragma unused(v, w)
7120
7121         ASSERT(cfil_udp_gc_thread == current_thread());
7122         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7123
7124         // Kick off gc shortly
7125         cfil_udp_gc_thread_sleep(false);
7126         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
7127         /* NOTREACHED */
7128 }
7129
7130 static void
7131 cfil_info_udp_expire(void *v, wait_result_t w)
7132 {
7133 #pragma unused(v, w)
7134
7135         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
7136         static uint32_t expired_count = 0;
7137
7138         struct cfil_info *cfil_info;
7139         struct cfil_hash_entry *hash_entry;
7140         struct cfil_db *db;
7141         struct socket *so;
7142         u_int64_t current_time = 0;
7143
7144         current_time = net_uptime();
7145
7146         // Get all expired UDP flow ids
7147         cfil_rw_lock_shared(&cfil_lck_rw);
7148
7149         if (cfil_sock_udp_attached_count == 0) {
7150                 cfil_rw_unlock_shared(&cfil_lck_rw);
7151                 goto go_sleep;
7152         }
7153
7154         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
7155                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
7156                         break;
7157                 }
7158
7159                 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
7160                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
7161                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7162                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
7163                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
7164                                 expired_count++;
7165                         }
7166                 }
7167         }
7168         cfil_rw_unlock_shared(&cfil_lck_rw);
7169
7170         if (expired_count == 0) {
7171                 goto go_sleep;
7172         }
7173
7174         for (uint32_t i = 0; i < expired_count; i++) {
7175                 // Search for socket (UDP only and lock so)
7176                 so = cfil_socket_from_sock_id(expired_array[i], true);
7177                 if (so == NULL) {
7178                         continue;
7179                 }
7180
7181                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
7182                 if (cfil_info == NULL) {
7183                         goto unlock;
7184                 }
7185
7186                 db = so->so_cfil_db;
7187                 hash_entry = cfil_info->cfi_hash_entry;
7188
7189                 if (db == NULL || hash_entry == NULL) {
7190                         goto unlock;
7191                 }
7192
7193 #if GC_DEBUG || LIFECYCLE_DEBUG
7194                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
7195 #endif
7196
7197                 cfil_db_delete_entry(db, hash_entry);
7198                 CFIL_INFO_FREE(cfil_info);
7199                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7200
7201                 if (so->so_flags & SOF_CONTENT_FILTER) {
7202                         if (db->cfdb_count == 0) {
7203                                 so->so_flags &= ~SOF_CONTENT_FILTER;
7204                         }
7205                         VERIFY(so->so_usecount > 0);
7206                         so->so_usecount--;
7207                 }
7208 unlock:
7209                 socket_unlock(so, 1);
7210         }
7211
7212 #if GC_DEBUG
7213         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
7214 #endif
7215         expired_count = 0;
7216
7217 go_sleep:
7218
7219         // Sleep forever (until waken up) if no more UDP flow to clean
7220         cfil_rw_lock_shared(&cfil_lck_rw);
7221         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7222         cfil_rw_unlock_shared(&cfil_lck_rw);
7223         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7224         /* NOTREACHED */
7225 }
7226
7227 struct m_tag *
7228 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7229 {
7230         struct m_tag *tag = NULL;
7231         struct cfil_tag *ctag = NULL;
7232         struct cfil_hash_entry *hash_entry = NULL;
7233         struct inpcb *inp = NULL;
7234
7235         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7236             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7237                 return NULL;
7238         }
7239
7240         inp = sotoinpcb(cfil_info->cfi_so);
7241
7242         /* Allocate a tag */
7243         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7244             sizeof(struct cfil_tag), M_DONTWAIT, m);
7245
7246         if (tag) {
7247                 ctag = (struct cfil_tag*)(tag + 1);
7248                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7249                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7250                 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7251
7252                 hash_entry = cfil_info->cfi_hash_entry;
7253                 if (hash_entry->cfentry_family == AF_INET6) {
7254                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7255                             &hash_entry->cfentry_faddr.addr6,
7256                             hash_entry->cfentry_fport);
7257                 } else if (hash_entry->cfentry_family == AF_INET) {
7258                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7259                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
7260                             hash_entry->cfentry_fport);
7261                 }
7262                 m_tag_prepend(m, tag);
7263                 return tag;
7264         }
7265         return NULL;
7266 }
7267
7268 struct m_tag *
7269 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7270     struct sockaddr **faddr, int *inp_flags)
7271 {
7272         struct m_tag *tag = NULL;
7273         struct cfil_tag *ctag = NULL;
7274
7275         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7276         if (tag) {
7277                 ctag = (struct cfil_tag *)(tag + 1);
7278                 if (state_change_cnt) {
7279                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
7280                 }
7281                 if (options) {
7282                         *options = ctag->cfil_so_options;
7283                 }
7284                 if (faddr) {
7285                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7286                 }
7287                 if (inp_flags) {
7288                         *inp_flags = ctag->cfil_inp_flags;
7289                 }
7290
7291                 /*
7292                  * Unlink tag and hand it over to caller.
7293                  * Note that caller will be responsible to free it.
7294                  */
7295                 m_tag_unlink(m, tag);
7296                 return tag;
7297         }
7298         return NULL;
7299 }
7300
7301 boolean_t
7302 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7303 {
7304         struct m_tag *tag = NULL;
7305         struct cfil_tag *ctag = NULL;
7306
7307         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7308         if (tag) {
7309                 ctag = (struct cfil_tag *)(tag + 1);
7310                 if (inp_flags) {
7311                         *inp_flags = ctag->cfil_inp_flags;
7312                 }
7313                 return true;
7314         }
7315         return false;
7316 }
7317
7318 static int
7319 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7320 {
7321         struct content_filter *cfc = NULL;
7322         errno_t error = 0;
7323         size_t msgsize = 0;
7324
7325         if (buffer == NULL || stats_count == 0) {
7326                 return error;
7327         }
7328
7329         if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7330                 return error;
7331         }
7332
7333         cfc = content_filters[kcunit - 1];
7334         if (cfc == NULL) {
7335                 return error;
7336         }
7337
7338         /* Would be wasteful to try */
7339         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7340                 error = ENOBUFS;
7341                 goto done;
7342         }
7343
7344         msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7345         buffer->msghdr.cfm_len = (uint32_t)msgsize;
7346         buffer->msghdr.cfm_version = 1;
7347         buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7348         buffer->msghdr.cfm_op = CFM_OP_STATS;
7349         buffer->msghdr.cfm_sock_id = 0;
7350         buffer->count = stats_count;
7351
7352 #if STATS_DEBUG
7353         CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7354             kcunit,
7355             (unsigned long)msgsize,
7356             (unsigned long)sizeof(struct cfil_msg_stats_report),
7357             (unsigned long)sizeof(struct cfil_msg_sock_stats),
7358             (unsigned long)stats_count);
7359 #endif
7360
7361         error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7362             buffer,
7363             msgsize,
7364             CTL_DATA_EOR);
7365         if (error != 0) {
7366                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7367                 goto done;
7368         }
7369         OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7370
7371 #if STATS_DEBUG
7372         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7373 #endif
7374
7375 done:
7376
7377         if (error == ENOBUFS) {
7378                 OSIncrementAtomic(
7379                         &cfil_stats.cfs_stats_event_flow_control);
7380
7381                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7382                         cfil_rw_lock_exclusive(&cfil_lck_rw);
7383                 }
7384
7385                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7386
7387                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7388         } else if (error != 0) {
7389                 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7390         }
7391
7392         return error;
7393 }
7394
7395 static void
7396 cfil_stats_report_thread_sleep(bool forever)
7397 {
7398 #if STATS_DEBUG
7399         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7400 #endif
7401
7402         if (forever) {
7403                 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7404                     THREAD_INTERRUPTIBLE);
7405         } else {
7406                 uint64_t deadline = 0;
7407                 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7408                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7409
7410                 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7411                     THREAD_INTERRUPTIBLE, deadline);
7412         }
7413 }
7414
7415 static void
7416 cfil_stats_report_thread_func(void *v, wait_result_t w)
7417 {
7418 #pragma unused(v, w)
7419
7420         ASSERT(cfil_stats_report_thread == current_thread());
7421         thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7422
7423         // Kick off gc shortly
7424         cfil_stats_report_thread_sleep(false);
7425         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7426         /* NOTREACHED */
7427 }
7428
7429 static bool
7430 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7431     struct cfil_info *cfil_info,
7432     struct cfil_entry *entry,
7433     struct timeval current_tv)
7434 {
7435         struct cfil_stats_report_buffer *buffer = NULL;
7436         struct cfil_msg_sock_stats *flow_array = NULL;
7437         struct cfil_msg_sock_stats *stats = NULL;
7438         struct inpcb *inp = NULL;
7439         struct timeval diff_time;
7440         uint64_t diff_time_usecs;
7441         int index = 0;
7442
7443         if (entry->cfe_stats_report_frequency == 0) {
7444                 return false;
7445         }
7446
7447         buffer = global_cfil_stats_report_buffers[kcunit - 1];
7448         if (buffer == NULL) {
7449 #if STATS_DEBUG
7450                 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7451 #endif
7452                 return false;
7453         }
7454
7455         timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7456         diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7457
7458 #if STATS_DEBUG
7459         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7460             (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7461             (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7462             (unsigned long long)current_tv.tv_sec,
7463             (unsigned long long)current_tv.tv_usec,
7464             (unsigned long long)diff_time.tv_sec,
7465             (unsigned long long)diff_time.tv_usec,
7466             (unsigned long long)diff_time_usecs,
7467             (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7468             cfil_info->cfi_sock_id);
7469 #endif
7470
7471         // Compare elapsed time in usecs
7472         if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7473 #if STATS_DEBUG
7474                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7475                     cfil_info->cfi_byte_inbound_count,
7476                     entry->cfe_byte_inbound_count_reported);
7477                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7478                     cfil_info->cfi_byte_outbound_count,
7479                     entry->cfe_byte_outbound_count_reported);
7480 #endif
7481                 // Check if flow has new bytes that have not been reported
7482                 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7483                     entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7484                         flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7485                         index = global_cfil_stats_counts[kcunit - 1];
7486
7487                         stats = &flow_array[index];
7488                         stats->cfs_sock_id = cfil_info->cfi_sock_id;
7489                         stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7490                         stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7491
7492                         if (entry->cfe_laddr_sent == false) {
7493                                 /* cache it if necessary */
7494                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7495                                         inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7496                                         if (inp != NULL) {
7497                                                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7498                                                 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7499                                                 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7500                                                 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7501                                                     src, dst, !IS_INP_V6(inp), outgoing);
7502                                         }
7503                                 }
7504
7505                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7506                                         stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7507                                         entry->cfe_laddr_sent = true;
7508                                 }
7509                         }
7510
7511                         global_cfil_stats_counts[kcunit - 1]++;
7512
7513                         entry->cfe_stats_report_ts = current_tv;
7514                         entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7515                         entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7516 #if STATS_DEBUG
7517                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7518 #endif
7519                         CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7520                         return true;
7521                 }
7522         }
7523         return false;
7524 }
7525
7526 static void
7527 cfil_stats_report(void *v, wait_result_t w)
7528 {
7529 #pragma unused(v, w)
7530
7531         struct cfil_info *cfil_info = NULL;
7532         struct cfil_entry *entry = NULL;
7533         struct timeval current_tv;
7534         uint32_t flow_count = 0;
7535         uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7536         bool flow_reported = false;
7537
7538 #if STATS_DEBUG
7539         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7540 #endif
7541
7542         do {
7543                 // Collect all sock ids of flows that has new stats
7544                 cfil_rw_lock_shared(&cfil_lck_rw);
7545
7546                 if (cfil_sock_attached_stats_count == 0) {
7547 #if STATS_DEBUG
7548                         CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7549 #endif
7550                         cfil_rw_unlock_shared(&cfil_lck_rw);
7551                         goto go_sleep;
7552                 }
7553
7554                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7555                         if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7556                                 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7557                         }
7558                         global_cfil_stats_counts[kcunit - 1] = 0;
7559                 }
7560
7561                 microuptime(&current_tv);
7562                 flow_count = 0;
7563
7564                 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7565                         if (saved_next_sock_id != 0 &&
7566                             saved_next_sock_id == cfil_info->cfi_sock_id) {
7567                                 // Here is where we left off previously, start accumulating
7568                                 saved_next_sock_id = 0;
7569                         }
7570
7571                         if (saved_next_sock_id == 0) {
7572                                 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7573                                         // Examine a fixed number of flows each round.  Remember the current flow
7574                                         // so we can start from here for next loop
7575                                         saved_next_sock_id = cfil_info->cfi_sock_id;
7576                                         break;
7577                                 }
7578
7579                                 flow_reported = false;
7580                                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7581                                         entry = &cfil_info->cfi_entries[kcunit - 1];
7582                                         if (entry->cfe_filter == NULL) {
7583 #if STATS_DEBUG
7584                                                 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7585                                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7586 #endif
7587                                                 continue;
7588                                         }
7589
7590                                         if ((entry->cfe_stats_report_frequency > 0) &&
7591                                             cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7592                                                 flow_reported = true;
7593                                         }
7594                                 }
7595                                 if (flow_reported == true) {
7596                                         flow_count++;
7597                                 }
7598                         }
7599                 }
7600
7601                 if (flow_count > 0) {
7602 #if STATS_DEBUG
7603                         CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7604 #endif
7605                         for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7606                                 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7607                                     global_cfil_stats_counts[kcunit - 1] > 0) {
7608                                         cfil_dispatch_stats_event_locked(kcunit,
7609                                             global_cfil_stats_report_buffers[kcunit - 1],
7610                                             global_cfil_stats_counts[kcunit - 1]);
7611                                 }
7612                         }
7613                 } else {
7614                         cfil_rw_unlock_shared(&cfil_lck_rw);
7615                         goto go_sleep;
7616                 }
7617
7618                 cfil_rw_unlock_shared(&cfil_lck_rw);
7619
7620                 // Loop again if we haven't finished the whole cfil_info list
7621         } while (saved_next_sock_id != 0);
7622
7623 go_sleep:
7624
7625         // Sleep forever (until waken up) if no more flow to report
7626         cfil_rw_lock_shared(&cfil_lck_rw);
7627         cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7628         cfil_rw_unlock_shared(&cfil_lck_rw);
7629         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7630         /* NOTREACHED */
7631 }