bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by INET/INET6 sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many INET/INET6 sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
  50  *   UDP, ICMP, etc).
  51  * - The current implementation supports up to two simultaneous content filters
  52  *   for iOS devices and eight simultaneous content filters for OSX.
  53  *
  54  *
  55  * NECP FILTER CONTROL UNIT
  56  *
  57  * A user space filter agent uses the Network Extension Control Policy (NECP)
  58  * database to specify which INET/INET6 sockets need to be filtered. The NECP
  59  * criteria may be based on a variety of properties like user ID or proc UUID.
  60  *
  61  * The NECP "filter control unit" is used by the socket content filter subsystem
  62  * to deliver the relevant INET/INET6 content information to the appropriate
  63  * user space filter agent via its kernel control socket instance.
  64  * This works as follows:
  65  *
  66  * 1) The user space filter agent specifies an NECP filter control unit when
  67  *    in adds its filtering rules to the NECP database.
  68  *
  69  * 2) The user space filter agent also sets its NECP filter control unit on the
  70  *    content filter kernel control socket via the socket option
  71  *    CFIL_OPT_NECP_CONTROL_UNIT.
  72  *
  73  * 3) The NECP database is consulted to find out if a given INET/INET6 socket
  74  *    needs to be subjected to content filtering and returns the corresponding
  75  *    NECP filter control unit  -- the NECP filter control unit is actually
  76  *    stored in the INET/INET6 socket structure so the NECP lookup is really simple.
  77  *
  78  * 4) The NECP filter control unit is then used to find the corresponding
  79  *    kernel control socket instance.
  80  *
  81  * Note: NECP currently supports a single filter control unit per INET/INET6 socket
  82  *       but this restriction may be soon lifted.
  83  *
  84  *
  85  * THE MESSAGING PROTOCOL
  86  *
  87  * The socket content filter subsystem and a user space filter agent
  88  * communicate over the kernel control socket via an asynchronous
  89  * messaging protocol (this is not a request-response protocol).
  90  * The socket content filter subsystem sends event messages to the user
  91  * space filter agent about the INET/INET6 sockets it is interested to filter.
  92  * The user space filter agent sends action messages to either allow
  93  * data to pass or to disallow the data flow (and drop the connection).
  94  *
  95  * All messages over a content filter kernel control socket share the same
  96  * common header of type "struct cfil_msg_hdr". The message type tells if
  97  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  98  * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
  99  * For TCP, flows are per-socket.  For UDP and other datagrame protocols, there
 100  * could be multiple flows per socket.
 101  *
 102  * Note the message header length field may be padded for alignment and can
 103  * be larger than the actual content of the message.
 104  * The field "cfm_op" describe the kind of event or action.
 105  *
 106  * Here are the kinds of content filter events:
 107  * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
 108  * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
 109  * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
 110  * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
 111  *
 112  *
 113  * EVENT MESSAGES
 114  *
 115  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 116  * data that is being sent or received. The position of this span of data
 117  * in the data flow is described by a set of start and end offsets. These
 118  * are absolute 64 bits offsets. The first byte sent (or received) starts
 119  * at offset 0 and ends at offset 1. The length of the content data
 120  * is given by the difference between the end offset and the start offset.
 121  *
 122  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 123  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 124  * action message is sent by the user space filter agent.
 125  *
 126  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 127  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 128  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 129  *
 130  * They are two kinds of primary content filter actions:
 131  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 132  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 133  *
 134  * There is also an action to mark a given client flow as already filtered
 135  * at a higher level, CFM_OP_BLESS_CLIENT.
 136  *
 137  *
 138  * ACTION MESSAGES
 139  *
 140  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 141  * agent allow data to flow up to the specified pass offset -- there
 142  * is a pass offset for outgoing data and a pass offset for incoming data.
 143  * When a new INET/INET6 socket is attached to the content filter and a flow is
 144  * created, each pass offset is initially set to 0 so no data is allowed to pass by
 145  * default.  When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 146  * then the data flow becomes unrestricted.
 147  *
 148  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 149  * with a pass offset smaller than the pass offset of a previous
 150  * CFM_OP_DATA_UPDATE message is silently ignored.
 151  *
 152  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 153  * to tell the kernel how much data it wants to see by using the peek offsets.
 154  * Just like pass offsets, there is a peek offset for each direction.
 155  * When a new INET/INET6 flow is created, each peek offset is initially set to 0
 156  * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
 157  * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
 158  * by the user space filter agent.  When the peek offset is set to CFM_MAX_OFFSET via
 159  * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
 160  *
 161  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 162  * Also a peek offsets cannot be smaller than the corresponding end offset
 163  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 164  * to set a too small peek value is silently ignored.
 165  *
 166  *
 167  * PER FLOW "struct cfil_info"
 168  *
 169  * As soon as a INET/INET6 socket gets attached to a content filter, a
 170  * "struct cfil_info" is created to hold the content filtering state for this
 171  * socket.  For UDP and other datagram protocols, as soon as traffic is seen for
 172  * each new flow identified by its 4-tuple of source address/port and destination
 173  * address/port, a "struct cfil_info" is created.  Each datagram socket may
 174  * have multiple flows maintained in a hash table of "struct cfil_info" entries.
 175  *
 176  * The content filtering state is made of the following information
 177  * for each direction:
 178  * - The current pass offset;
 179  * - The first and last offsets of the data pending, waiting for a filtering
 180  *   decision;
 181  * - The inject queue for data that passed the filters and that needs
 182  *   to be re-injected;
 183  * - A content filter specific state in a set of  "struct cfil_entry"
 184  *
 185  *
 186  * CONTENT FILTER STATE "struct cfil_entry"
 187  *
 188  * The "struct cfil_entry" maintains the information most relevant to the
 189  * message handling over a kernel control socket with a user space filter agent.
 190  *
 191  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 192  * to the kernel control socket unit it corresponds to and also has a pointer
 193  * to the corresponding "struct content_filter".
 194  *
 195  * For each direction, "struct cfil_entry" maintains the following information:
 196  * - The pass offset
 197  * - The peek offset
 198  * - The offset of the last data peeked at by the filter
 199  * - A queue of data that's waiting to be delivered to the  user space filter
 200  *   agent on the kernel control socket
 201  * - A queue of data for which event messages have been sent on the kernel
 202  *   control socket and are pending for a filtering decision.
 203  *
 204  *
 205  * CONTENT FILTER QUEUES
 206  *
 207  * Data that is being filtered is steered away from the INET/INET6 socket buffer
 208  * and instead will sit in one of three content filter queues until the data
 209  * can be re-injected into the INET/INET6 socket buffer.
 210  *
 211  * A content filter queue is represented by "struct cfil_queue" that contains
 212  * a list of mbufs and the start and end offset of the data span of
 213  * the list of mbufs.
 214  *
 215  * The data moves into the three content filter queues according to this
 216  * sequence:
 217  * a) The "cfe_ctl_q" of "struct cfil_entry"
 218  * b) The "cfe_pending_q" of "struct cfil_entry"
 219  * c) The "cfi_inject_q" of "struct cfil_info"
 220  *
 221  * Note: The sequence (a),(b) may be repeated several times if there is more
 222  * than one content filter attached to the INET/INET6 socket.
 223  *
 224  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 225  * kernel conntrol socket for two reasons:
 226  * - The peek offset is less that the end offset of the mbuf data
 227  * - The kernel control socket is flow controlled
 228  *
 229  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 230  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 231  * socket and are waiting for a pass action message fromn the user space
 232  * filter agent. An mbuf length must be fully allowed to pass to be removed
 233  * from the cfe_pending_q.
 234  *
 235  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 236  * by the user space filter agent and that needs to be re-injected into the
 237  * INET/INET6 socket.
 238  *
 239  *
 240  * IMPACT ON FLOW CONTROL
 241  *
 242  * An essential aspect of the content filer subsystem is to minimize the
 243  * impact on flow control of the INET/INET6 sockets being filtered.
 244  *
 245  * The processing overhead of the content filtering may have an effect on
 246  * flow control by adding noticeable delays and cannot be eliminated --
 247  * care must be taken by the user space filter agent to minimize the
 248  * processing delays.
 249  *
 250  * The amount of data being filtered is kept in buffers while waiting for
 251  * a decision by the user space filter agent. This amount of data pending
 252  * needs to be subtracted from the amount of data available in the
 253  * corresponding INET/INET6 socket buffer. This is done by modifying
 254  * sbspace() and tcp_sbspace() to account for amount of data pending
 255  * in the content filter.
 256  *
 257  *
 258  * LOCKING STRATEGY
 259  *
 260  * The global state of content filter subsystem is protected by a single
 261  * read-write lock "cfil_lck_rw". The data flow can be done with the
 262  * cfil read-write lock held as shared so it can be re-entered from multiple
 263  * threads.
 264  *
 265  * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
 266  * protected by the socket lock.
 267  *
 268  * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
 269  * is held. That's why we have some sequences where we drop the cfil read-write
 270  * lock before taking the INET/INET6 lock.
 271  *
 272  * It is also important to lock the INET/INET6 socket buffer while the content
 273  * filter is modifying the amount of pending data. Otherwise the calculations
 274  * in sbspace() and tcp_sbspace()  could be wrong.
 275  *
 276  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 277  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 278  *
 279  * Actually "cfe_link" and "cfe_filter" are protected by both by
 280  * "cfil_lck_rw" and the socket lock: they may be modified only when
 281  * "cfil_lck_rw" is exclusive and the socket is locked.
 282  *
 283  * To read the other fields of "struct content_filter" we have to take
 284  * "cfil_lck_rw" in shared mode.
 285  *
 286  * DATAGRAM SPECIFICS:
 287  *
 288  * The socket content filter supports all INET/INET6 protocols.  However
 289  * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
 290  * are slightly different.
 291  *
 292  * Each datagram socket may have multiple flows.  Each flow is identified
 293  * by the flow's source address/port and destination address/port tuple
 294  * and is represented as a "struct cfil_info" entry.  For each socket,
 295  * a hash table is used to maintain the collection of flows under that socket.
 296  *
 297  * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
 298  * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt.  This portion
 299  * of the cfi_sock_id is used locate the socket during socket lookup.  The lowest 32-bits
 300  * of the cfi_sock_id contains a hash of the flow's 4-tuple.  This portion of the cfi_sock_id
 301  * is used as the hash value for the flow hash table lookup within the parent socket.
 302  *
 303  * Since datagram sockets may not be connected, flow states may not be maintained in the
 304  * socket structures and thus have to be saved for each packet.  These saved states will be
 305  * used for both outgoing and incoming reinjections.  For outgoing packets, destination
 306  * address/port as well as the current socket states will be saved.  During reinjection,
 307  * these saved states will be used instead.  For incoming packets, control and address
 308  * mbufs will be chained to the data.  During reinjection, the whole chain will be queued
 309  * onto the incoming socket buffer.
 310  *
 311  * LIMITATIONS
 312  *
 313  * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
 314  *
 315  * - Does not support TCP unordered messages
 316  */
 317
 318 /*
 319  *      TO DO LIST
 320  *
 321  *      Deal with OOB
 322  *
 323  */
 324
 325 #include <sys/types.h>
 326 #include <sys/kern_control.h>
 327 #include <sys/queue.h>
 328 #include <sys/domain.h>
 329 #include <sys/protosw.h>
 330 #include <sys/syslog.h>
 331 #include <sys/systm.h>
 332 #include <sys/param.h>
 333 #include <sys/mbuf.h>
 334
 335 #include <kern/locks.h>
 336 #include <kern/zalloc.h>
 337 #include <kern/debug.h>
 338
 339 #include <net/content_filter.h>
 340 #include <net/content_filter_crypto.h>
 341
 342 #define _IP_VHL
 343 #include <netinet/ip.h>
 344 #include <netinet/in_pcb.h>
 345 #include <netinet/tcp.h>
 346 #include <netinet/tcp_var.h>
 347 #include <netinet/udp.h>
 348 #include <netinet/udp_var.h>
 349
 350 #include <string.h>
 351 #include <libkern/libkern.h>
 352 #include <kern/sched_prim.h>
 353 #include <kern/task.h>
 354 #include <mach/task_info.h>
 355
 356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 357 #define MAX_CONTENT_FILTER 2
 358 #else
 359 #define MAX_CONTENT_FILTER 8
 360 #endif
 361
 362 extern struct inpcbinfo ripcbinfo;
 363 struct cfil_entry;
 364
 365 /*
 366  * The structure content_filter represents a user space content filter
 367  * It's created and associated with a kernel control socket instance
 368  */
 369 struct content_filter {
 370         kern_ctl_ref            cf_kcref;
 371         u_int32_t               cf_kcunit;
 372         u_int32_t               cf_flags;
 373
 374         uint32_t                cf_necp_control_unit;
 375
 376         uint32_t                cf_sock_count;
 377         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 378
 379         cfil_crypto_state_t cf_crypto_state;
 380 };
 381
 382 #define CFF_ACTIVE              0x01
 383 #define CFF_DETACHING           0x02
 384 #define CFF_FLOW_CONTROLLED     0x04
 385
 386 struct content_filter **content_filters = NULL;
 387 uint32_t cfil_active_count = 0; /* Number of active content filters */
 388 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 389 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 390 uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 392
 393 static kern_ctl_ref cfil_kctlref = NULL;
 394
 395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 396 static lck_attr_t *cfil_lck_attr = NULL;
 397 static lck_grp_t *cfil_lck_grp = NULL;
 398 decl_lck_rw_data(static, cfil_lck_rw);
 399
 400 #define CFIL_RW_LCK_MAX 8
 401
 402 int cfil_rw_nxt_lck = 0;
 403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 404
 405 int cfil_rw_nxt_unlck = 0;
 406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 407
 408 #define CONTENT_FILTER_ZONE_NAME        "content_filter"
 409 #define CONTENT_FILTER_ZONE_MAX         10
 410 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
 411
 412
 413 #define CFIL_INFO_ZONE_NAME     "cfil_info"
 414 #define CFIL_INFO_ZONE_MAX      1024
 415 static struct zone *cfil_info_zone = NULL;      /* zone for cfil_info */
 416
 417 MBUFQ_HEAD(cfil_mqhead);
 418
 419 struct cfil_queue {
 420         uint64_t                q_start; /* offset of first byte in queue */
 421         uint64_t                q_end; /* offset of last byte in queue */
 422         struct cfil_mqhead      q_mq;
 423 };
 424
 425 /*
 426  * struct cfil_entry
 427  *
 428  * The is one entry per content filter
 429  */
 430 struct cfil_entry {
 431         TAILQ_ENTRY(cfil_entry) cfe_link;
 432         SLIST_ENTRY(cfil_entry) cfe_order_link;
 433         struct content_filter   *cfe_filter;
 434
 435         struct cfil_info        *cfe_cfil_info;
 436         uint32_t                cfe_flags;
 437         uint32_t                cfe_necp_control_unit;
 438         struct timeval          cfe_last_event; /* To user space */
 439         struct timeval          cfe_last_action; /* From user space */
 440         uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
 441         uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
 442         struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
 443         uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
 444         boolean_t               cfe_laddr_sent;
 445
 446         struct cfe_buf {
 447                 /*
 448                  * cfe_pending_q holds data that has been delivered to
 449                  * the filter and for which we are waiting for an action
 450                  */
 451                 struct cfil_queue       cfe_pending_q;
 452                 /*
 453                  * This queue is for data that has not be delivered to
 454                  * the content filter (new data, pass peek or flow control)
 455                  */
 456                 struct cfil_queue       cfe_ctl_q;
 457
 458                 uint64_t                cfe_pass_offset;
 459                 uint64_t                cfe_peek_offset;
 460                 uint64_t                cfe_peeked;
 461         } cfe_snd, cfe_rcv;
 462 };
 463
 464 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 465 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 466 #define CFEF_DATA_START                 0x0004  /* can send data event */
 467 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 468 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 469 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 470 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 471 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 472
 473
 474 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 475                 struct timeval _tdiff;                                                                                          \
 476                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 477                         timersub(t1, t0, &_tdiff);                                                                              \
 478                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 479                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 480                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 481                 }
 482
 483 struct cfil_hash_entry;
 484
 485 /*
 486  * struct cfil_info
 487  *
 488  * There is a struct cfil_info per socket
 489  */
 490 struct cfil_info {
 491         TAILQ_ENTRY(cfil_info)  cfi_link;
 492         TAILQ_ENTRY(cfil_info)  cfi_link_stats;
 493         struct socket           *cfi_so;
 494         uint64_t                cfi_flags;
 495         uint64_t                cfi_sock_id;
 496         struct timeval64        cfi_first_event;
 497         uint32_t                cfi_op_list_ctr;
 498         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 499         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 500         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 501         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 502
 503         int                     cfi_dir;
 504         uint64_t                cfi_byte_inbound_count;
 505         uint64_t                cfi_byte_outbound_count;
 506
 507         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 508         u_int32_t               cfi_debug;
 509         struct cfi_buf {
 510                 /*
 511                  * cfi_pending_first and cfi_pending_last describe the total
 512                  * amount of data outstanding for all the filters on
 513                  * this socket and data in the flow queue
 514                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 515                  */
 516                 uint64_t                cfi_pending_first;
 517                 uint64_t                cfi_pending_last;
 518                 uint32_t                cfi_pending_mbcnt;
 519                 uint32_t                cfi_pending_mbnum;
 520                 uint32_t                cfi_tail_drop_cnt;
 521                 /*
 522                  * cfi_pass_offset is the minimum of all the filters
 523                  */
 524                 uint64_t                cfi_pass_offset;
 525                 /*
 526                  * cfi_inject_q holds data that needs to be re-injected
 527                  * into the socket after filtering and that can
 528                  * be queued because of flow control
 529                  */
 530                 struct cfil_queue       cfi_inject_q;
 531         } cfi_snd, cfi_rcv;
 532
 533         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 534         struct cfil_hash_entry *cfi_hash_entry;
 535         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 536 } __attribute__((aligned(8)));
 537
 538 #define CFIF_DROP               0x0001  /* drop action applied */
 539 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 540 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 541 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 542 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 543 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 544 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 545 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 546 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 547
 548 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 549 #define CFI_SHIFT_GENCNT        32
 550 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 551 #define CFI_SHIFT_FLOWHASH      0
 552
 553 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
 554
 555 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 556 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 557
 558 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 559 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 560
 561 /*
 562  * UDP Socket Support
 563  */
 564 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 565 #define CFILHASHSIZE 16
 566 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 567
 568 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
 569 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
 570 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 571 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
 572                                            (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
 573 #define IS_RAW(so)  (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW  && so->so_proto->pr_protocol == IPPROTO_RAW)
 574
 575 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 576 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
 577 #else
 578 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
 579 #endif
 580
 581 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
 582 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
 583 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 584
 585 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 586                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 588                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 592                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 593                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 594
 595 /*
 596  * Periodic Statistics Report:
 597  */
 598 static struct thread *cfil_stats_report_thread;
 599 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
 600 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
 601 #define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
 602
 603 /* This buffer must have same layout as struct cfil_msg_stats_report */
 604 struct cfil_stats_report_buffer {
 605         struct cfil_msg_hdr        msghdr;
 606         uint32_t                   count;
 607         struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
 608 };
 609 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
 610 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
 611
 612 /*
 613  * UDP Garbage Collection:
 614  */
 615 static struct thread *cfil_udp_gc_thread;
 616 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 617 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 618 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 619 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 620
 621 /*
 622  * UDP flow queue thresholds
 623  */
 624 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 625 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 626 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 627 /*
 628  * UDP flow queue threshold globals:
 629  */
 630 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 631 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 632
 633 /*
 634  * struct cfil_hash_entry
 635  *
 636  * Hash entry for cfil_info
 637  */
 638 struct cfil_hash_entry {
 639         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 640         struct cfil_info               *cfentry_cfil;
 641         u_short cfentry_fport;
 642         u_short cfentry_lport;
 643         sa_family_t                    cfentry_family;
 644         u_int32_t                      cfentry_flowhash;
 645         u_int64_t                      cfentry_lastused;
 646         union {
 647                 /* foreign host table entry */
 648                 struct in_addr_4in6 addr46;
 649                 struct in6_addr addr6;
 650         } cfentry_faddr;
 651         union {
 652                 /* local host table entry */
 653                 struct in_addr_4in6 addr46;
 654                 struct in6_addr addr6;
 655         } cfentry_laddr;
 656 };
 657
 658 /*
 659  * struct cfil_db
 660  *
 661  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 662  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 663  */
 664 struct cfil_db {
 665         struct socket       *cfdb_so;
 666         uint32_t            cfdb_count;       /* Number of total content filters */
 667         struct cfilhashhead *cfdb_hashbase;
 668         u_long              cfdb_hashmask;
 669         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 670 };
 671
 672 /*
 673  * CFIL specific mbuf tag:
 674  * Save state of socket at the point of data entry into cfil.
 675  * Use saved state for reinjection at protocol layer.
 676  */
 677 struct cfil_tag {
 678         union sockaddr_in_4_6 cfil_faddr;
 679         uint32_t cfil_so_state_change_cnt;
 680         short cfil_so_options;
 681         int cfil_inp_flags;
 682 };
 683
 684 #define    CFIL_HASH_ENTRY_ZONE_NAME    "cfil_entry_hash"
 685 #define    CFIL_HASH_ENTRY_ZONE_MAX     1024
 686 static struct zone *cfil_hash_entry_zone = NULL;
 687
 688 #define    CFIL_DB_ZONE_NAME       "cfil_db"
 689 #define    CFIL_DB_ZONE_MAX        1024
 690 static struct zone *cfil_db_zone = NULL;
 691
 692 /*
 693  * Statistics
 694  */
 695
 696 struct cfil_stats cfil_stats;
 697
 698 /*
 699  * For troubleshooting
 700  */
 701 int cfil_log_level = LOG_ERR;
 702 int cfil_debug = 1;
 703
 704 // Debug controls added for selective debugging.
 705 // Disabled for production.  If enabled,
 706 // these will have performance impact
 707 #define LIFECYCLE_DEBUG 0
 708 #define VERDICT_DEBUG 0
 709 #define DATA_DEBUG 0
 710 #define SHOW_DEBUG 0
 711 #define GC_DEBUG 0
 712 #define STATS_DEBUG 0
 713
 714 /*
 715  * Sysctls for logs and statistics
 716  */
 717 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 718     struct sysctl_req *);
 719 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 720     struct sysctl_req *);
 721
 722 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 723
 724 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 725     &cfil_log_level, 0, "");
 726
 727 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 728     &cfil_debug, 0, "");
 729
 730 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 731     &cfil_sock_attached_count, 0, "");
 732
 733 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 734     &cfil_active_count, 0, "");
 735
 736 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 737     &cfil_close_wait_timeout, 0, "");
 738
 739 static int cfil_sbtrim = 1;
 740 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 741     &cfil_sbtrim, 0, "");
 742
 743 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 744     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 745
 746 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 747     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 748
 749 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 750     &cfil_stats, cfil_stats, "");
 751
 752 /*
 753  * Forward declaration to appease the compiler
 754  */
 755 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 756     uint64_t, uint64_t);
 757 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 758 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 759 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 760 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 761 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 762     struct mbuf *, struct mbuf *, uint32_t);
 763 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 764     struct mbuf *, uint64_t);
 765 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 766     struct in_addr, u_int16_t);
 767 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 768     struct in6_addr *, u_int16_t);
 769
 770 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 771 static void cfil_info_free(struct cfil_info *);
 772 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 773 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 774 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 775 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 776 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 777 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 778 static void cfil_info_verify(struct cfil_info *);
 779 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 780     uint64_t, uint64_t);
 781 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 782 static void cfil_release_sockbuf(struct socket *, int);
 783 static int cfil_filters_attached(struct socket *);
 784
 785 static void cfil_rw_lock_exclusive(lck_rw_t *);
 786 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 787 static void cfil_rw_lock_shared(lck_rw_t *);
 788 static void cfil_rw_unlock_shared(lck_rw_t *);
 789 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 790 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 791
 792 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 793 static errno_t cfil_db_init(struct socket *);
 794 static void cfil_db_free(struct socket *so);
 795 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
 796 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 797 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 798 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *);
 799 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 800 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, int);
 801 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 802 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 803     struct mbuf *, struct mbuf *, uint32_t);
 804 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 805 static void cfil_sock_udp_is_closed(struct socket *);
 806 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 807 static int cfil_sock_udp_shutdown(struct socket *, int *);
 808 static void cfil_sock_udp_close_wait(struct socket *);
 809 static void cfil_sock_udp_buf_update(struct sockbuf *);
 810 static int cfil_filters_udp_attached(struct socket *, bool);
 811 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 812     struct in6_addr **, struct in6_addr **,
 813     u_int16_t *, u_int16_t *);
 814 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 815     struct in_addr *, struct in_addr *,
 816     u_int16_t *, u_int16_t *);
 817 static void cfil_info_log(int, struct cfil_info *, const char *);
 818 void cfil_filter_show(u_int32_t);
 819 void cfil_info_show(void);
 820 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
 821 bool cfil_info_action_timed_out(struct cfil_info *, int);
 822 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 823 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
 824 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
 825 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 826 static void cfil_info_udp_expire(void *, wait_result_t);
 827 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
 828 static void cfil_sock_received_verdict(struct socket *so);
 829 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 830     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 831     boolean_t, boolean_t);
 832 static void cfil_stats_report_thread_func(void *, wait_result_t);
 833 static void cfil_stats_report(void *v, wait_result_t w);
 834
 835 bool check_port(struct sockaddr *, u_short);
 836
 837 /*
 838  * Content filter global read write lock
 839  */
 840
 841 static void
 842 cfil_rw_lock_exclusive(lck_rw_t *lck)
 843 {
 844         void *lr_saved;
 845
 846         lr_saved = __builtin_return_address(0);
 847
 848         lck_rw_lock_exclusive(lck);
 849
 850         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 851         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 852 }
 853
 854 static void
 855 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 856 {
 857         void *lr_saved;
 858
 859         lr_saved = __builtin_return_address(0);
 860
 861         lck_rw_unlock_exclusive(lck);
 862
 863         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 864         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 865 }
 866
 867 static void
 868 cfil_rw_lock_shared(lck_rw_t *lck)
 869 {
 870         void *lr_saved;
 871
 872         lr_saved = __builtin_return_address(0);
 873
 874         lck_rw_lock_shared(lck);
 875
 876         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 877         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 878 }
 879
 880 static void
 881 cfil_rw_unlock_shared(lck_rw_t *lck)
 882 {
 883         void *lr_saved;
 884
 885         lr_saved = __builtin_return_address(0);
 886
 887         lck_rw_unlock_shared(lck);
 888
 889         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 890         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 891 }
 892
 893 static boolean_t
 894 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 895 {
 896         void *lr_saved;
 897         boolean_t upgraded;
 898
 899         lr_saved = __builtin_return_address(0);
 900
 901         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 902         if (upgraded) {
 903                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 904                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 905         }
 906         return upgraded;
 907 }
 908
 909 static void
 910 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 911 {
 912         void *lr_saved;
 913
 914         lr_saved = __builtin_return_address(0);
 915
 916         lck_rw_lock_exclusive_to_shared(lck);
 917
 918         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 919         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 920 }
 921
 922 static void
 923 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 924 {
 925 #if !MACH_ASSERT
 926 #pragma unused(lck, exclusive)
 927 #endif
 928         LCK_RW_ASSERT(lck,
 929             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 930 }
 931
 932 /*
 933  * Return the number of bytes in the mbuf chain using the same
 934  * method as m_length() or sballoc()
 935  *
 936  * Returns data len - starting from PKT start
 937  * - retmbcnt - optional param to get total mbuf bytes in chain
 938  * - retmbnum - optional param to get number of mbufs in chain
 939  */
 940 static unsigned int
 941 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 942 {
 943         struct mbuf *m0;
 944         unsigned int pktlen = 0;
 945         int mbcnt;
 946         int mbnum;
 947
 948         // Locate the start of data
 949         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 950                 if (m0->m_flags & M_PKTHDR) {
 951                         break;
 952                 }
 953         }
 954         if (m0 == NULL) {
 955                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 956                 return 0;
 957         }
 958         m = m0;
 959
 960         if (retmbcnt == NULL && retmbnum == NULL) {
 961                 return m_length(m);
 962         }
 963
 964         pktlen = 0;
 965         mbcnt = 0;
 966         mbnum = 0;
 967         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 968                 pktlen += m0->m_len;
 969                 mbnum++;
 970                 mbcnt += MSIZE;
 971                 if (m0->m_flags & M_EXT) {
 972                         mbcnt += m0->m_ext.ext_size;
 973                 }
 974         }
 975         if (retmbcnt) {
 976                 *retmbcnt = mbcnt;
 977         }
 978         if (retmbnum) {
 979                 *retmbnum = mbnum;
 980         }
 981         return pktlen;
 982 }
 983
 984 static struct mbuf *
 985 cfil_data_start(struct mbuf *m)
 986 {
 987         struct mbuf *m0;
 988
 989         // Locate the start of data
 990         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 991                 if (m0->m_flags & M_PKTHDR) {
 992                         break;
 993                 }
 994         }
 995         return m0;
 996 }
 997
 998 /*
 999  * Common mbuf queue utilities
1000  */
1001
1002 static inline void
1003 cfil_queue_init(struct cfil_queue *cfq)
1004 {
1005         cfq->q_start = 0;
1006         cfq->q_end = 0;
1007         MBUFQ_INIT(&cfq->q_mq);
1008 }
1009
1010 static inline uint64_t
1011 cfil_queue_drain(struct cfil_queue *cfq)
1012 {
1013         uint64_t drained = cfq->q_start - cfq->q_end;
1014         cfq->q_start = 0;
1015         cfq->q_end = 0;
1016         MBUFQ_DRAIN(&cfq->q_mq);
1017
1018         return drained;
1019 }
1020
1021 /* Return 1 when empty, 0 otherwise */
1022 static inline int
1023 cfil_queue_empty(struct cfil_queue *cfq)
1024 {
1025         return MBUFQ_EMPTY(&cfq->q_mq);
1026 }
1027
1028 static inline uint64_t
1029 cfil_queue_offset_first(struct cfil_queue *cfq)
1030 {
1031         return cfq->q_start;
1032 }
1033
1034 static inline uint64_t
1035 cfil_queue_offset_last(struct cfil_queue *cfq)
1036 {
1037         return cfq->q_end;
1038 }
1039
1040 static inline uint64_t
1041 cfil_queue_len(struct cfil_queue *cfq)
1042 {
1043         return cfq->q_end - cfq->q_start;
1044 }
1045
1046 /*
1047  * Routines to verify some fundamental assumptions
1048  */
1049
1050 static void
1051 cfil_queue_verify(struct cfil_queue *cfq)
1052 {
1053         mbuf_t chain;
1054         mbuf_t m;
1055         mbuf_t n;
1056         uint64_t queuesize = 0;
1057
1058         /* Verify offset are ordered */
1059         VERIFY(cfq->q_start <= cfq->q_end);
1060
1061         /*
1062          * When queue is empty, the offsets are equal otherwise the offsets
1063          * are different
1064          */
1065         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1066             (!MBUFQ_EMPTY(&cfq->q_mq) &&
1067             cfq->q_start != cfq->q_end));
1068
1069         MBUFQ_FOREACH(chain, &cfq->q_mq) {
1070                 size_t chainsize = 0;
1071                 m = chain;
1072                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1073                 // skip the addr and control stuff if present
1074                 m = cfil_data_start(m);
1075
1076                 if (m == NULL ||
1077                     m == (void *)M_TAG_FREE_PATTERN ||
1078                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1079                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1080                         panic("%s - mq %p is free at %p", __func__,
1081                             &cfq->q_mq, m);
1082                 }
1083                 for (n = m; n != NULL; n = n->m_next) {
1084                         if (n->m_type != MT_DATA &&
1085                             n->m_type != MT_HEADER &&
1086                             n->m_type != MT_OOBDATA) {
1087                                 panic("%s - %p unsupported type %u", __func__,
1088                                     n, n->m_type);
1089                         }
1090                         chainsize += n->m_len;
1091                 }
1092                 if (mlen != chainsize) {
1093                         panic("%s - %p m_length() %u != chainsize %lu",
1094                             __func__, m, mlen, chainsize);
1095                 }
1096                 queuesize += chainsize;
1097         }
1098         if (queuesize != cfq->q_end - cfq->q_start) {
1099                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1100                     m, queuesize, cfq->q_end - cfq->q_start);
1101         }
1102 }
1103
1104 static void
1105 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1106 {
1107         CFIL_QUEUE_VERIFY(cfq);
1108
1109         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1110         cfq->q_end += len;
1111
1112         CFIL_QUEUE_VERIFY(cfq);
1113 }
1114
1115 static void
1116 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1117 {
1118         CFIL_QUEUE_VERIFY(cfq);
1119
1120         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1121
1122         MBUFQ_REMOVE(&cfq->q_mq, m);
1123         MBUFQ_NEXT(m) = NULL;
1124         cfq->q_start += len;
1125
1126         CFIL_QUEUE_VERIFY(cfq);
1127 }
1128
1129 static mbuf_t
1130 cfil_queue_first(struct cfil_queue *cfq)
1131 {
1132         return MBUFQ_FIRST(&cfq->q_mq);
1133 }
1134
1135 static mbuf_t
1136 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1137 {
1138 #pragma unused(cfq)
1139         return MBUFQ_NEXT(m);
1140 }
1141
1142 static void
1143 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1144 {
1145         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1146         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1147
1148         /* Verify the queues are ordered so that pending is before ctl */
1149         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1150
1151         /* The peek offset cannot be less than the pass offset */
1152         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1153
1154         /* Make sure we've updated the offset we peeked at  */
1155         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1156 }
1157
1158 static void
1159 cfil_entry_verify(struct cfil_entry *entry)
1160 {
1161         cfil_entry_buf_verify(&entry->cfe_snd);
1162         cfil_entry_buf_verify(&entry->cfe_rcv);
1163 }
1164
1165 static void
1166 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1167 {
1168         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1169
1170         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1171 }
1172
1173 static void
1174 cfil_info_verify(struct cfil_info *cfil_info)
1175 {
1176         int i;
1177
1178         if (cfil_info == NULL) {
1179                 return;
1180         }
1181
1182         cfil_info_buf_verify(&cfil_info->cfi_snd);
1183         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1184
1185         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1186                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1187         }
1188 }
1189
1190 static void
1191 verify_content_filter(struct content_filter *cfc)
1192 {
1193         struct cfil_entry *entry;
1194         uint32_t count = 0;
1195
1196         VERIFY(cfc->cf_sock_count >= 0);
1197
1198         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1199                 count++;
1200                 VERIFY(cfc == entry->cfe_filter);
1201         }
1202         VERIFY(count == cfc->cf_sock_count);
1203 }
1204
1205 /*
1206  * Kernel control socket callbacks
1207  */
1208 static errno_t
1209 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1210     void **unitinfo)
1211 {
1212         errno_t error = 0;
1213         struct content_filter *cfc = NULL;
1214
1215         CFIL_LOG(LOG_NOTICE, "");
1216
1217         cfc = zalloc(content_filter_zone);
1218         if (cfc == NULL) {
1219                 CFIL_LOG(LOG_ERR, "zalloc failed");
1220                 error = ENOMEM;
1221                 goto done;
1222         }
1223         bzero(cfc, sizeof(struct content_filter));
1224
1225         cfil_rw_lock_exclusive(&cfil_lck_rw);
1226         if (content_filters == NULL) {
1227                 struct content_filter **tmp;
1228
1229                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1230
1231                 MALLOC(tmp,
1232                     struct content_filter **,
1233                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1234                     M_TEMP,
1235                     M_WAITOK | M_ZERO);
1236
1237                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1238
1239                 if (tmp == NULL && content_filters == NULL) {
1240                         error = ENOMEM;
1241                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1242                         goto done;
1243                 }
1244                 /* Another thread may have won the race */
1245                 if (content_filters != NULL) {
1246                         FREE(tmp, M_TEMP);
1247                 } else {
1248                         content_filters = tmp;
1249                 }
1250         }
1251
1252         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1253                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1254                 error = EINVAL;
1255         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1256                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1257                 error = EADDRINUSE;
1258         } else {
1259                 /*
1260                  * kernel control socket kcunit numbers start at 1
1261                  */
1262                 content_filters[sac->sc_unit - 1] = cfc;
1263
1264                 cfc->cf_kcref = kctlref;
1265                 cfc->cf_kcunit = sac->sc_unit;
1266                 TAILQ_INIT(&cfc->cf_sock_entries);
1267
1268                 *unitinfo = cfc;
1269                 cfil_active_count++;
1270
1271                 // Allocate periodic stats buffer for this filter
1272                 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1273                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1274
1275                         struct cfil_stats_report_buffer *buf;
1276
1277                         MALLOC(buf,
1278                             struct cfil_stats_report_buffer *,
1279                             sizeof(struct cfil_stats_report_buffer),
1280                             M_TEMP,
1281                             M_WAITOK | M_ZERO);
1282
1283                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1284
1285                         if (buf == NULL) {
1286                                 error = ENOMEM;
1287                                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1288                                 goto done;
1289                         }
1290
1291                         /* Another thread may have won the race */
1292                         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1293                                 FREE(buf, M_TEMP);
1294                         } else {
1295                                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1296                         }
1297                 }
1298         }
1299         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1300 done:
1301         if (error != 0 && cfc != NULL) {
1302                 zfree(content_filter_zone, cfc);
1303         }
1304
1305         if (error == 0) {
1306                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1307         } else {
1308                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1309         }
1310
1311         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1312             error, cfil_active_count, sac->sc_unit);
1313
1314         return error;
1315 }
1316
1317 static errno_t
1318 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1319 {
1320 #pragma unused(kctlref)
1321         errno_t error = 0;
1322         struct content_filter *cfc;
1323         struct cfil_entry *entry;
1324         uint64_t sock_flow_id = 0;
1325
1326         CFIL_LOG(LOG_NOTICE, "");
1327
1328         if (content_filters == NULL) {
1329                 CFIL_LOG(LOG_ERR, "no content filter");
1330                 error = EINVAL;
1331                 goto done;
1332         }
1333         if (kcunit > MAX_CONTENT_FILTER) {
1334                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1335                     kcunit, MAX_CONTENT_FILTER);
1336                 error = EINVAL;
1337                 goto done;
1338         }
1339
1340         cfc = (struct content_filter *)unitinfo;
1341         if (cfc == NULL) {
1342                 goto done;
1343         }
1344
1345         cfil_rw_lock_exclusive(&cfil_lck_rw);
1346         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1347                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1348                     kcunit);
1349                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1350                 goto done;
1351         }
1352         cfc->cf_flags |= CFF_DETACHING;
1353         /*
1354          * Remove all sockets from the filter
1355          */
1356         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1357                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1358
1359                 verify_content_filter(cfc);
1360                 /*
1361                  * Accept all outstanding data by pushing to next filter
1362                  * or back to socket
1363                  *
1364                  * TBD: Actually we should make sure all data has been pushed
1365                  * back to socket
1366                  */
1367                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1368                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1369                         struct socket *so = cfil_info->cfi_so;
1370                         sock_flow_id = cfil_info->cfi_sock_id;
1371
1372                         /* Need to let data flow immediately */
1373                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1374                             CFEF_DATA_START;
1375
1376                         /*
1377                          * Respect locking hierarchy
1378                          */
1379                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1380
1381                         socket_lock(so, 1);
1382
1383                         /*
1384                          * When cfe_filter is NULL the filter is detached
1385                          * and the entry has been removed from cf_sock_entries
1386                          */
1387                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1388                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1389                                 goto release;
1390                         }
1391
1392                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1393                             CFM_MAX_OFFSET,
1394                             CFM_MAX_OFFSET);
1395
1396                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1397                             CFM_MAX_OFFSET,
1398                             CFM_MAX_OFFSET);
1399
1400                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1401
1402                         /*
1403                          * Check again to make sure if the cfil_info is still valid
1404                          * as the socket may have been unlocked when when calling
1405                          * cfil_acquire_sockbuf()
1406                          */
1407                         if (entry->cfe_filter == NULL ||
1408                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1409                                 goto release;
1410                         }
1411
1412                         /* The filter is now detached */
1413                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1414 #if LIFECYCLE_DEBUG
1415                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1416 #endif
1417                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1418                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1419                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1420                             cfil_filters_attached(so) == 0) {
1421                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1422                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1423                                 wakeup((caddr_t)cfil_info);
1424                         }
1425
1426                         /*
1427                          * Remove the filter entry from the content filter
1428                          * but leave the rest of the state intact as the queues
1429                          * may not be empty yet
1430                          */
1431                         entry->cfe_filter = NULL;
1432                         entry->cfe_necp_control_unit = 0;
1433
1434                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1435                         cfc->cf_sock_count--;
1436 release:
1437                         socket_unlock(so, 1);
1438                 }
1439         }
1440         verify_content_filter(cfc);
1441
1442         /* Free the stats buffer for this filter */
1443         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1444                 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1445                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1446         }
1447         VERIFY(cfc->cf_sock_count == 0);
1448
1449         /*
1450          * Make filter inactive
1451          */
1452         content_filters[kcunit - 1] = NULL;
1453         cfil_active_count--;
1454         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1455
1456         if (cfc->cf_crypto_state != NULL) {
1457                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1458                 cfc->cf_crypto_state = NULL;
1459         }
1460
1461         zfree(content_filter_zone, cfc);
1462 done:
1463         if (error == 0) {
1464                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1465         } else {
1466                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1467         }
1468
1469         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1470             error, cfil_active_count, kcunit);
1471
1472         return error;
1473 }
1474
1475 /*
1476  * cfil_acquire_sockbuf()
1477  *
1478  * Prevent any other thread from acquiring the sockbuf
1479  * We use sb_cfil_thread as a semaphore to prevent other threads from
1480  * messing with the sockbuf -- see sblock()
1481  * Note: We do not set SB_LOCK here because the thread may check or modify
1482  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1483  * sblock(), sbunlock() or sodefunct()
1484  */
1485 static int
1486 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1487 {
1488         thread_t tp = current_thread();
1489         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1490         lck_mtx_t *mutex_held;
1491         int error = 0;
1492
1493         /*
1494          * Wait until no thread is holding the sockbuf and other content
1495          * filter threads have released the sockbuf
1496          */
1497         while ((sb->sb_flags & SB_LOCK) ||
1498             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1499                 if (so->so_proto->pr_getlock != NULL) {
1500                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1501                 } else {
1502                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1503                 }
1504
1505                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1506
1507                 sb->sb_wantlock++;
1508                 VERIFY(sb->sb_wantlock != 0);
1509
1510                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1511                     NULL);
1512
1513                 VERIFY(sb->sb_wantlock != 0);
1514                 sb->sb_wantlock--;
1515         }
1516         /*
1517          * Use reference count for repetitive calls on same thread
1518          */
1519         if (sb->sb_cfil_refs == 0) {
1520                 VERIFY(sb->sb_cfil_thread == NULL);
1521                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1522
1523                 sb->sb_cfil_thread = tp;
1524                 sb->sb_flags |= SB_LOCK;
1525         }
1526         sb->sb_cfil_refs++;
1527
1528         /* We acquire the socket buffer when we need to cleanup */
1529         if (cfil_info == NULL) {
1530                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1531                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1532                 error = 0;
1533         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1534                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1535                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1536                 error = EPIPE;
1537         }
1538
1539         return error;
1540 }
1541
1542 static void
1543 cfil_release_sockbuf(struct socket *so, int outgoing)
1544 {
1545         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1546         thread_t tp = current_thread();
1547
1548         socket_lock_assert_owned(so);
1549
1550         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1551                 panic("%s sb_cfil_thread %p not current %p", __func__,
1552                     sb->sb_cfil_thread, tp);
1553         }
1554         /*
1555          * Don't panic if we are defunct because SB_LOCK has
1556          * been cleared by sodefunct()
1557          */
1558         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1559                 panic("%s SB_LOCK not set on %p", __func__,
1560                     sb);
1561         }
1562         /*
1563          * We can unlock when the thread unwinds to the last reference
1564          */
1565         sb->sb_cfil_refs--;
1566         if (sb->sb_cfil_refs == 0) {
1567                 sb->sb_cfil_thread = NULL;
1568                 sb->sb_flags &= ~SB_LOCK;
1569
1570                 if (sb->sb_wantlock > 0) {
1571                         wakeup(&sb->sb_flags);
1572                 }
1573         }
1574 }
1575
1576 cfil_sock_id_t
1577 cfil_sock_id_from_socket(struct socket *so)
1578 {
1579         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1580                 return so->so_cfil->cfi_sock_id;
1581         } else {
1582                 return CFIL_SOCK_ID_NONE;
1583         }
1584 }
1585
1586 static bool
1587 cfil_socket_safe_lock(struct inpcb *inp)
1588 {
1589         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1590                 socket_lock(inp->inp_socket, 1);
1591                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1592                         return true;
1593                 }
1594                 socket_unlock(inp->inp_socket, 1);
1595         }
1596         return false;
1597 }
1598
1599 static struct socket *
1600 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1601 {
1602         struct socket *so = NULL;
1603         u_int64_t gencnt = cfil_sock_id >> 32;
1604         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1605         struct inpcb *inp = NULL;
1606         struct inpcbinfo *pcbinfo = NULL;
1607
1608 #if VERDICT_DEBUG
1609         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1610 #endif
1611
1612         if (udp_only) {
1613                 goto find_udp;
1614         }
1615
1616         pcbinfo = &tcbinfo;
1617         lck_rw_lock_shared(pcbinfo->ipi_lock);
1618         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1619                 if (inp->inp_state != INPCB_STATE_DEAD &&
1620                     inp->inp_socket != NULL &&
1621                     inp->inp_flowhash == flowhash &&
1622                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1623                     inp->inp_socket->so_cfil != NULL) {
1624                         if (cfil_socket_safe_lock(inp)) {
1625                                 so = inp->inp_socket;
1626                         }
1627                         break;
1628                 }
1629         }
1630         lck_rw_done(pcbinfo->ipi_lock);
1631         if (so != NULL) {
1632                 goto done;
1633         }
1634
1635 find_udp:
1636
1637         pcbinfo = &udbinfo;
1638         lck_rw_lock_shared(pcbinfo->ipi_lock);
1639         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1640                 if (inp->inp_state != INPCB_STATE_DEAD &&
1641                     inp->inp_socket != NULL &&
1642                     inp->inp_socket->so_cfil_db != NULL &&
1643                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1644                         if (cfil_socket_safe_lock(inp)) {
1645                                 so = inp->inp_socket;
1646                         }
1647                         break;
1648                 }
1649         }
1650         lck_rw_done(pcbinfo->ipi_lock);
1651
1652         pcbinfo = &ripcbinfo;
1653         lck_rw_lock_shared(pcbinfo->ipi_lock);
1654         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1655                 if (inp->inp_state != INPCB_STATE_DEAD &&
1656                     inp->inp_socket != NULL &&
1657                     inp->inp_socket->so_cfil_db != NULL &&
1658                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1659                         if (cfil_socket_safe_lock(inp)) {
1660                                 so = inp->inp_socket;
1661                         }
1662                         break;
1663                 }
1664         }
1665         lck_rw_done(pcbinfo->ipi_lock);
1666
1667 done:
1668         if (so == NULL) {
1669                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1670                 CFIL_LOG(LOG_DEBUG,
1671                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1672                     cfil_sock_id, gencnt, flowhash);
1673         }
1674
1675         return so;
1676 }
1677
1678 static struct socket *
1679 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1680 {
1681         struct socket *so = NULL;
1682         struct inpcb *inp = NULL;
1683         struct inpcbinfo *pcbinfo = &tcbinfo;
1684
1685         lck_rw_lock_shared(pcbinfo->ipi_lock);
1686         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1687                 if (inp->inp_state != INPCB_STATE_DEAD &&
1688                     inp->inp_socket != NULL &&
1689                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1690                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1691                         if (cfil_socket_safe_lock(inp)) {
1692                                 so = inp->inp_socket;
1693                         }
1694                         break;
1695                 }
1696         }
1697         lck_rw_done(pcbinfo->ipi_lock);
1698         if (so != NULL) {
1699                 goto done;
1700         }
1701
1702         pcbinfo = &udbinfo;
1703         lck_rw_lock_shared(pcbinfo->ipi_lock);
1704         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1705                 if (inp->inp_state != INPCB_STATE_DEAD &&
1706                     inp->inp_socket != NULL &&
1707                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1708                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1709                         if (cfil_socket_safe_lock(inp)) {
1710                                 so = inp->inp_socket;
1711                         }
1712                         break;
1713                 }
1714         }
1715         lck_rw_done(pcbinfo->ipi_lock);
1716
1717 done:
1718         return so;
1719 }
1720
1721 static void
1722 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1723 {
1724         struct cfil_info *cfil = NULL;
1725         Boolean found = FALSE;
1726         int kcunit;
1727
1728         if (cfil_info == NULL) {
1729                 return;
1730         }
1731
1732         if (report_frequency) {
1733                 if (entry == NULL) {
1734                         return;
1735                 }
1736
1737                 // Update stats reporting frequency.
1738                 if (entry->cfe_stats_report_frequency != report_frequency) {
1739                         entry->cfe_stats_report_frequency = report_frequency;
1740                         if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1741                                 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1742                         }
1743                         microuptime(&entry->cfe_stats_report_ts);
1744
1745                         // Insert cfil_info into list only if it is not in yet.
1746                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1747                                 if (cfil == cfil_info) {
1748                                         return;
1749                                 }
1750                         }
1751
1752                         TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1753
1754                         // Wake up stats thread if this is first flow added
1755                         if (cfil_sock_attached_stats_count == 0) {
1756                                 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1757                         }
1758                         cfil_sock_attached_stats_count++;
1759 #if STATS_DEBUG
1760                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1761                             cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1762                             cfil_info->cfi_sock_id,
1763                             entry->cfe_stats_report_frequency);
1764 #endif
1765                 }
1766         } else {
1767                 // Turn off stats reporting for this filter.
1768                 if (entry != NULL) {
1769                         // Already off, no change.
1770                         if (entry->cfe_stats_report_frequency == 0) {
1771                                 return;
1772                         }
1773
1774                         entry->cfe_stats_report_frequency = 0;
1775                         // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1776                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1777                                 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1778                                         return;
1779                                 }
1780                         }
1781                 }
1782
1783                 // No more filter asking for stats for this cfil_info, remove from list.
1784                 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1785                         found = FALSE;
1786                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1787                                 if (cfil == cfil_info) {
1788                                         found = TRUE;
1789                                         break;
1790                                 }
1791                         }
1792                         if (found) {
1793                                 cfil_sock_attached_stats_count--;
1794                                 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1795 #if STATS_DEBUG
1796                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1797                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1798                                     cfil_info->cfi_sock_id);
1799 #endif
1800                         }
1801                 }
1802         }
1803 }
1804
1805 static errno_t
1806 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1807     int flags)
1808 {
1809 #pragma unused(kctlref, flags)
1810         errno_t error = 0;
1811         struct cfil_msg_hdr *msghdr;
1812         struct content_filter *cfc = (struct content_filter *)unitinfo;
1813         struct socket *so;
1814         struct cfil_msg_action *action_msg;
1815         struct cfil_entry *entry;
1816         struct cfil_info *cfil_info = NULL;
1817         unsigned int data_len = 0;
1818
1819         CFIL_LOG(LOG_INFO, "");
1820
1821         if (content_filters == NULL) {
1822                 CFIL_LOG(LOG_ERR, "no content filter");
1823                 error = EINVAL;
1824                 goto done;
1825         }
1826         if (kcunit > MAX_CONTENT_FILTER) {
1827                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1828                     kcunit, MAX_CONTENT_FILTER);
1829                 error = EINVAL;
1830                 goto done;
1831         }
1832         if (m == NULL) {
1833                 CFIL_LOG(LOG_ERR, "null mbuf");
1834                 error = EINVAL;
1835                 goto done;
1836         }
1837         data_len = m_length(m);
1838
1839         if (data_len < sizeof(struct cfil_msg_hdr)) {
1840                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1841                 error = EINVAL;
1842                 goto done;
1843         }
1844         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1845         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1846                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1847                 error = EINVAL;
1848                 goto done;
1849         }
1850         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1851                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1852                 error = EINVAL;
1853                 goto done;
1854         }
1855         if (msghdr->cfm_len > data_len) {
1856                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1857                 error = EINVAL;
1858                 goto done;
1859         }
1860
1861         /* Validate action operation */
1862         switch (msghdr->cfm_op) {
1863         case CFM_OP_DATA_UPDATE:
1864                 OSIncrementAtomic(
1865                         &cfil_stats.cfs_ctl_action_data_update);
1866                 break;
1867         case CFM_OP_DROP:
1868                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1869                 break;
1870         case CFM_OP_BLESS_CLIENT:
1871                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1872                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1873                         error = EINVAL;
1874                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1875                             msghdr->cfm_len,
1876                             msghdr->cfm_op);
1877                         goto done;
1878                 }
1879                 error = cfil_action_bless_client(kcunit, msghdr);
1880                 goto done;
1881         case CFM_OP_SET_CRYPTO_KEY:
1882                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1883                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1884                         error = EINVAL;
1885                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1886                             msghdr->cfm_len,
1887                             msghdr->cfm_op);
1888                         goto done;
1889                 }
1890                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1891                 goto done;
1892         default:
1893                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1894                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1895                 error = EINVAL;
1896                 goto done;
1897         }
1898         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1899                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1900                 error = EINVAL;
1901                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1902                     msghdr->cfm_len,
1903                     msghdr->cfm_op);
1904                 goto done;
1905         }
1906         cfil_rw_lock_shared(&cfil_lck_rw);
1907         if (cfc != (void *)content_filters[kcunit - 1]) {
1908                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1909                     kcunit);
1910                 error = EINVAL;
1911                 cfil_rw_unlock_shared(&cfil_lck_rw);
1912                 goto done;
1913         }
1914         cfil_rw_unlock_shared(&cfil_lck_rw);
1915
1916         // Search for socket (TCP+UDP and lock so)
1917         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1918         if (so == NULL) {
1919                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1920                     msghdr->cfm_sock_id);
1921                 error = EINVAL;
1922                 goto done;
1923         }
1924
1925         cfil_info = so->so_cfil_db != NULL ?
1926             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1927
1928         if (cfil_info == NULL) {
1929                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1930                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1931                 error = EINVAL;
1932                 goto unlock;
1933         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1934                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1935                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1936                 error = EINVAL;
1937                 goto unlock;
1938         }
1939
1940         if (cfil_info->cfi_debug) {
1941                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
1942         }
1943
1944         entry = &cfil_info->cfi_entries[kcunit - 1];
1945         if (entry->cfe_filter == NULL) {
1946                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1947                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1948                 error = EINVAL;
1949                 goto unlock;
1950         }
1951
1952         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1953                 entry->cfe_flags |= CFEF_DATA_START;
1954         } else {
1955                 CFIL_LOG(LOG_ERR,
1956                     "so %llx attached not sent for %u",
1957                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1958                 error = EINVAL;
1959                 goto unlock;
1960         }
1961
1962         microuptime(&entry->cfe_last_action);
1963         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1964
1965         action_msg = (struct cfil_msg_action *)msghdr;
1966
1967         switch (msghdr->cfm_op) {
1968         case CFM_OP_DATA_UPDATE:
1969
1970                 if (cfil_info->cfi_debug) {
1971                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
1972                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1973                             (uint64_t)VM_KERNEL_ADDRPERM(so),
1974                             cfil_info->cfi_sock_id,
1975                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1976                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1977                 }
1978
1979 #if VERDICT_DEBUG
1980                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1981                     (uint64_t)VM_KERNEL_ADDRPERM(so),
1982                     cfil_info->cfi_sock_id,
1983                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1984                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1985 #endif
1986                 /*
1987                  * Received verdict, at this point we know this
1988                  * socket connection is allowed.  Unblock thread
1989                  * immediately before proceeding to process the verdict.
1990                  */
1991                 cfil_sock_received_verdict(so);
1992
1993                 if (action_msg->cfa_out_peek_offset != 0 ||
1994                     action_msg->cfa_out_pass_offset != 0) {
1995                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1996                             action_msg->cfa_out_pass_offset,
1997                             action_msg->cfa_out_peek_offset);
1998                 }
1999                 if (error == EJUSTRETURN) {
2000                         error = 0;
2001                 }
2002                 if (error != 0) {
2003                         break;
2004                 }
2005                 if (action_msg->cfa_in_peek_offset != 0 ||
2006                     action_msg->cfa_in_pass_offset != 0) {
2007                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2008                             action_msg->cfa_in_pass_offset,
2009                             action_msg->cfa_in_peek_offset);
2010                 }
2011                 if (error == EJUSTRETURN) {
2012                         error = 0;
2013                 }
2014
2015                 // Toggle stats reporting according to received verdict.
2016                 cfil_rw_lock_exclusive(&cfil_lck_rw);
2017                 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2018                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2019
2020                 break;
2021
2022         case CFM_OP_DROP:
2023                 if (cfil_info->cfi_debug) {
2024                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2025                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2026                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2027                             cfil_info->cfi_sock_id,
2028                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2029                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2030                 }
2031
2032 #if VERDICT_DEBUG
2033                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2034                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2035                     cfil_info->cfi_sock_id,
2036                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2037                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2038 #endif
2039                 error = cfil_action_drop(so, cfil_info, kcunit);
2040                 cfil_sock_received_verdict(so);
2041                 break;
2042
2043         default:
2044                 error = EINVAL;
2045                 break;
2046         }
2047 unlock:
2048         socket_unlock(so, 1);
2049 done:
2050         mbuf_freem(m);
2051
2052         if (error == 0) {
2053                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2054         } else {
2055                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2056         }
2057
2058         return error;
2059 }
2060
2061 static errno_t
2062 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2063     int opt, void *data, size_t *len)
2064 {
2065 #pragma unused(kctlref, opt)
2066         struct cfil_info *cfil_info = NULL;
2067         errno_t error = 0;
2068         struct content_filter *cfc = (struct content_filter *)unitinfo;
2069
2070         CFIL_LOG(LOG_NOTICE, "");
2071
2072         cfil_rw_lock_shared(&cfil_lck_rw);
2073
2074         if (content_filters == NULL) {
2075                 CFIL_LOG(LOG_ERR, "no content filter");
2076                 error = EINVAL;
2077                 goto done;
2078         }
2079         if (kcunit > MAX_CONTENT_FILTER) {
2080                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2081                     kcunit, MAX_CONTENT_FILTER);
2082                 error = EINVAL;
2083                 goto done;
2084         }
2085         if (cfc != (void *)content_filters[kcunit - 1]) {
2086                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2087                     kcunit);
2088                 error = EINVAL;
2089                 goto done;
2090         }
2091         switch (opt) {
2092         case CFIL_OPT_NECP_CONTROL_UNIT:
2093                 if (*len < sizeof(uint32_t)) {
2094                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2095                         error = EINVAL;
2096                         goto done;
2097                 }
2098                 if (data != NULL) {
2099                         *(uint32_t *)data = cfc->cf_necp_control_unit;
2100                 }
2101                 break;
2102         case CFIL_OPT_GET_SOCKET_INFO:
2103                 if (*len != sizeof(struct cfil_opt_sock_info)) {
2104                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2105                         error = EINVAL;
2106                         goto done;
2107                 }
2108                 if (data == NULL) {
2109                         CFIL_LOG(LOG_ERR, "data not passed");
2110                         error = EINVAL;
2111                         goto done;
2112                 }
2113
2114                 struct cfil_opt_sock_info *sock_info =
2115                     (struct cfil_opt_sock_info *) data;
2116
2117                 // Unlock here so that we never hold both cfil_lck_rw and the
2118                 // socket_lock at the same time. Otherwise, this can deadlock
2119                 // because soclose() takes the socket_lock and then exclusive
2120                 // cfil_lck_rw and we require the opposite order.
2121
2122                 // WARNING: Be sure to never use anything protected
2123                 //     by cfil_lck_rw beyond this point.
2124                 // WARNING: Be sure to avoid fallthrough and
2125                 //     goto return_already_unlocked from this branch.
2126                 cfil_rw_unlock_shared(&cfil_lck_rw);
2127
2128                 // Search (TCP+UDP) and lock socket
2129                 struct socket *sock =
2130                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2131                 if (sock == NULL) {
2132 #if LIFECYCLE_DEBUG
2133                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2134                             sock_info->cfs_sock_id);
2135 #endif
2136                         error = ENOENT;
2137                         goto return_already_unlocked;
2138                 }
2139
2140                 cfil_info = (sock->so_cfil_db != NULL) ?
2141                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2142
2143                 if (cfil_info == NULL) {
2144 #if LIFECYCLE_DEBUG
2145                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2146                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
2147 #endif
2148                         error = EINVAL;
2149                         socket_unlock(sock, 1);
2150                         goto return_already_unlocked;
2151                 }
2152
2153                 // Fill out family, type, and protocol
2154                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2155                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2156                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2157
2158                 // Source and destination addresses
2159                 struct inpcb *inp = sotoinpcb(sock);
2160                 if (inp->inp_vflag & INP_IPV6) {
2161                         struct in6_addr *laddr = NULL, *faddr = NULL;
2162                         u_int16_t lport = 0, fport = 0;
2163
2164                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2165                             &laddr, &faddr, &lport, &fport);
2166                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2167                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2168                 } else if (inp->inp_vflag & INP_IPV4) {
2169                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2170                         u_int16_t lport = 0, fport = 0;
2171
2172                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2173                             &laddr, &faddr, &lport, &fport);
2174                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2175                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2176                 }
2177
2178                 // Set the pid info
2179                 sock_info->cfs_pid = sock->last_pid;
2180                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2181
2182                 if (sock->so_flags & SOF_DELEGATED) {
2183                         sock_info->cfs_e_pid = sock->e_pid;
2184                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2185                 } else {
2186                         sock_info->cfs_e_pid = sock->last_pid;
2187                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2188                 }
2189
2190                 socket_unlock(sock, 1);
2191
2192                 goto return_already_unlocked;
2193         default:
2194                 error = ENOPROTOOPT;
2195                 break;
2196         }
2197 done:
2198         cfil_rw_unlock_shared(&cfil_lck_rw);
2199
2200         return error;
2201
2202 return_already_unlocked:
2203
2204         return error;
2205 }
2206
2207 static errno_t
2208 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2209     int opt, void *data, size_t len)
2210 {
2211 #pragma unused(kctlref, opt)
2212         errno_t error = 0;
2213         struct content_filter *cfc = (struct content_filter *)unitinfo;
2214
2215         CFIL_LOG(LOG_NOTICE, "");
2216
2217         cfil_rw_lock_exclusive(&cfil_lck_rw);
2218
2219         if (content_filters == NULL) {
2220                 CFIL_LOG(LOG_ERR, "no content filter");
2221                 error = EINVAL;
2222                 goto done;
2223         }
2224         if (kcunit > MAX_CONTENT_FILTER) {
2225                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2226                     kcunit, MAX_CONTENT_FILTER);
2227                 error = EINVAL;
2228                 goto done;
2229         }
2230         if (cfc != (void *)content_filters[kcunit - 1]) {
2231                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2232                     kcunit);
2233                 error = EINVAL;
2234                 goto done;
2235         }
2236         switch (opt) {
2237         case CFIL_OPT_NECP_CONTROL_UNIT:
2238                 if (len < sizeof(uint32_t)) {
2239                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2240                             "len too small %lu", len);
2241                         error = EINVAL;
2242                         goto done;
2243                 }
2244                 if (cfc->cf_necp_control_unit != 0) {
2245                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2246                             "already set %u",
2247                             cfc->cf_necp_control_unit);
2248                         error = EINVAL;
2249                         goto done;
2250                 }
2251                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2252                 break;
2253         default:
2254                 error = ENOPROTOOPT;
2255                 break;
2256         }
2257 done:
2258         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2259
2260         return error;
2261 }
2262
2263
2264 static void
2265 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2266 {
2267 #pragma unused(kctlref, flags)
2268         struct content_filter *cfc = (struct content_filter *)unitinfo;
2269         struct socket *so = NULL;
2270         int error;
2271         struct cfil_entry *entry;
2272         struct cfil_info *cfil_info = NULL;
2273
2274         CFIL_LOG(LOG_INFO, "");
2275
2276         if (content_filters == NULL) {
2277                 CFIL_LOG(LOG_ERR, "no content filter");
2278                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2279                 return;
2280         }
2281         if (kcunit > MAX_CONTENT_FILTER) {
2282                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2283                     kcunit, MAX_CONTENT_FILTER);
2284                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2285                 return;
2286         }
2287         cfil_rw_lock_shared(&cfil_lck_rw);
2288         if (cfc != (void *)content_filters[kcunit - 1]) {
2289                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2290                     kcunit);
2291                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2292                 goto done;
2293         }
2294         /* Let's assume the flow control is lifted */
2295         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2296                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2297                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2298                 }
2299
2300                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2301
2302                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2303                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2304         }
2305         /*
2306          * Flow control will be raised again as soon as an entry cannot enqueue
2307          * to the kernel control socket
2308          */
2309         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2310                 verify_content_filter(cfc);
2311
2312                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2313
2314                 /* Find an entry that is flow controlled */
2315                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2316                         if (entry->cfe_cfil_info == NULL ||
2317                             entry->cfe_cfil_info->cfi_so == NULL) {
2318                                 continue;
2319                         }
2320                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2321                                 continue;
2322                         }
2323                 }
2324                 if (entry == NULL) {
2325                         break;
2326                 }
2327
2328                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2329
2330                 cfil_info = entry->cfe_cfil_info;
2331                 so = cfil_info->cfi_so;
2332
2333                 cfil_rw_unlock_shared(&cfil_lck_rw);
2334                 socket_lock(so, 1);
2335
2336                 do {
2337                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2338                         if (error == 0) {
2339                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2340                         }
2341                         cfil_release_sockbuf(so, 1);
2342                         if (error != 0) {
2343                                 break;
2344                         }
2345
2346                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2347                         if (error == 0) {
2348                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2349                         }
2350                         cfil_release_sockbuf(so, 0);
2351                 } while (0);
2352
2353                 socket_lock_assert_owned(so);
2354                 socket_unlock(so, 1);
2355
2356                 cfil_rw_lock_shared(&cfil_lck_rw);
2357         }
2358 done:
2359         cfil_rw_unlock_shared(&cfil_lck_rw);
2360 }
2361
2362 void
2363 cfil_init(void)
2364 {
2365         struct kern_ctl_reg kern_ctl;
2366         errno_t error = 0;
2367         vm_size_t content_filter_size = 0;      /* size of content_filter */
2368         vm_size_t cfil_info_size = 0;   /* size of cfil_info */
2369         vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2370         vm_size_t cfil_db_size = 0; /* size of cfil_db */
2371         unsigned int mbuf_limit = 0;
2372
2373         CFIL_LOG(LOG_NOTICE, "");
2374
2375         /*
2376          * Compile time verifications
2377          */
2378         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2379         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2380         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2381         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2382
2383         /*
2384          * Runtime time verifications
2385          */
2386         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2387             sizeof(uint32_t)));
2388         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2389             sizeof(uint32_t)));
2390         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2391             sizeof(uint32_t)));
2392         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2393             sizeof(uint32_t)));
2394
2395         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2396             sizeof(uint32_t)));
2397         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2398             sizeof(uint32_t)));
2399
2400         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2401             sizeof(uint32_t)));
2402         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2403             sizeof(uint32_t)));
2404         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2405             sizeof(uint32_t)));
2406         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2407             sizeof(uint32_t)));
2408
2409         /*
2410          * Zone for content filters kernel control sockets
2411          */
2412         content_filter_size = sizeof(struct content_filter);
2413         content_filter_zone = zinit(content_filter_size,
2414             CONTENT_FILTER_ZONE_MAX * content_filter_size,
2415             0,
2416             CONTENT_FILTER_ZONE_NAME);
2417         if (content_filter_zone == NULL) {
2418                 panic("%s: zinit(%s) failed", __func__,
2419                     CONTENT_FILTER_ZONE_NAME);
2420                 /* NOTREACHED */
2421         }
2422         zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2423         zone_change(content_filter_zone, Z_EXPAND, TRUE);
2424
2425         /*
2426          * Zone for per socket content filters
2427          */
2428         cfil_info_size = sizeof(struct cfil_info);
2429         cfil_info_zone = zinit(cfil_info_size,
2430             CFIL_INFO_ZONE_MAX * cfil_info_size,
2431             0,
2432             CFIL_INFO_ZONE_NAME);
2433         if (cfil_info_zone == NULL) {
2434                 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2435                 /* NOTREACHED */
2436         }
2437         zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2438         zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2439
2440         /*
2441          * Zone for content filters cfil hash entries and db
2442          */
2443         cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2444         cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2445             CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2446             0,
2447             CFIL_HASH_ENTRY_ZONE_NAME);
2448         if (cfil_hash_entry_zone == NULL) {
2449                 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2450                 /* NOTREACHED */
2451         }
2452         zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2453         zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2454
2455         cfil_db_size = sizeof(struct cfil_db);
2456         cfil_db_zone = zinit(cfil_db_size,
2457             CFIL_DB_ZONE_MAX * cfil_db_size,
2458             0,
2459             CFIL_DB_ZONE_NAME);
2460         if (cfil_db_zone == NULL) {
2461                 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2462                 /* NOTREACHED */
2463         }
2464         zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2465         zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2466
2467         /*
2468          * Allocate locks
2469          */
2470         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2471         if (cfil_lck_grp_attr == NULL) {
2472                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2473                 /* NOTREACHED */
2474         }
2475         cfil_lck_grp = lck_grp_alloc_init("content filter",
2476             cfil_lck_grp_attr);
2477         if (cfil_lck_grp == NULL) {
2478                 panic("%s: lck_grp_alloc_init failed", __func__);
2479                 /* NOTREACHED */
2480         }
2481         cfil_lck_attr = lck_attr_alloc_init();
2482         if (cfil_lck_attr == NULL) {
2483                 panic("%s: lck_attr_alloc_init failed", __func__);
2484                 /* NOTREACHED */
2485         }
2486         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2487
2488         TAILQ_INIT(&cfil_sock_head);
2489         TAILQ_INIT(&cfil_sock_head_stats);
2490
2491         /*
2492          * Register kernel control
2493          */
2494         bzero(&kern_ctl, sizeof(kern_ctl));
2495         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2496             sizeof(kern_ctl.ctl_name));
2497         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2498         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2499         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2500         kern_ctl.ctl_connect = cfil_ctl_connect;
2501         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2502         kern_ctl.ctl_send = cfil_ctl_send;
2503         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2504         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2505         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2506         error = ctl_register(&kern_ctl, &cfil_kctlref);
2507         if (error != 0) {
2508                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2509                 return;
2510         }
2511
2512         // Spawn thread for gargage collection
2513         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2514             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2515                 panic_plain("%s: Can't create UDP GC thread", __func__);
2516                 /* NOTREACHED */
2517         }
2518         /* this must not fail */
2519         VERIFY(cfil_udp_gc_thread != NULL);
2520
2521         // Spawn thread for statistics reporting
2522         if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2523             &cfil_stats_report_thread) != KERN_SUCCESS) {
2524                 panic_plain("%s: Can't create statistics report thread", __func__);
2525                 /* NOTREACHED */
2526         }
2527         /* this must not fail */
2528         VERIFY(cfil_stats_report_thread != NULL);
2529
2530         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2531         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2532         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2533         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2534
2535         memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2536 }
2537
2538 struct cfil_info *
2539 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2540 {
2541         int kcunit;
2542         struct cfil_info *cfil_info = NULL;
2543         struct inpcb *inp = sotoinpcb(so);
2544
2545         CFIL_LOG(LOG_INFO, "");
2546
2547         socket_lock_assert_owned(so);
2548
2549         cfil_info = zalloc(cfil_info_zone);
2550         if (cfil_info == NULL) {
2551                 goto done;
2552         }
2553         bzero(cfil_info, sizeof(struct cfil_info));
2554
2555         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2556         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2557
2558         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2559                 struct cfil_entry *entry;
2560
2561                 entry = &cfil_info->cfi_entries[kcunit - 1];
2562                 entry->cfe_cfil_info = cfil_info;
2563
2564                 /* Initialize the filter entry */
2565                 entry->cfe_filter = NULL;
2566                 entry->cfe_flags = 0;
2567                 entry->cfe_necp_control_unit = 0;
2568                 entry->cfe_snd.cfe_pass_offset = 0;
2569                 entry->cfe_snd.cfe_peek_offset = 0;
2570                 entry->cfe_snd.cfe_peeked = 0;
2571                 entry->cfe_rcv.cfe_pass_offset = 0;
2572                 entry->cfe_rcv.cfe_peek_offset = 0;
2573                 entry->cfe_rcv.cfe_peeked = 0;
2574                 /*
2575                  * Timestamp the last action to avoid pre-maturely
2576                  * triggering garbage collection
2577                  */
2578                 microuptime(&entry->cfe_last_action);
2579
2580                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2581                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2582                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2583                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2584         }
2585
2586         cfil_rw_lock_exclusive(&cfil_lck_rw);
2587
2588         /*
2589          * Create a cfi_sock_id that's not the socket pointer!
2590          */
2591
2592         if (hash_entry == NULL) {
2593                 // This is the TCP case, cfil_info is tracked per socket
2594                 if (inp->inp_flowhash == 0) {
2595                         inp->inp_flowhash = inp_calc_flowhash(inp);
2596                 }
2597
2598                 so->so_cfil = cfil_info;
2599                 cfil_info->cfi_so = so;
2600                 cfil_info->cfi_sock_id =
2601                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2602         } else {
2603                 // This is the UDP case, cfil_info is tracked in per-socket hash
2604                 cfil_info->cfi_so = so;
2605                 hash_entry->cfentry_cfil = cfil_info;
2606                 cfil_info->cfi_hash_entry = hash_entry;
2607                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2608                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2609                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2610
2611                 // Wake up gc thread if this is first flow added
2612                 if (cfil_sock_udp_attached_count == 0) {
2613                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2614                 }
2615
2616                 cfil_sock_udp_attached_count++;
2617         }
2618
2619         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2620         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2621
2622         cfil_sock_attached_count++;
2623
2624         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2625
2626 done:
2627         if (cfil_info != NULL) {
2628                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2629         } else {
2630                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2631         }
2632
2633         return cfil_info;
2634 }
2635
2636 int
2637 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2638 {
2639         int kcunit;
2640         int attached = 0;
2641
2642         CFIL_LOG(LOG_INFO, "");
2643
2644         socket_lock_assert_owned(so);
2645
2646         cfil_rw_lock_exclusive(&cfil_lck_rw);
2647
2648         for (kcunit = 1;
2649             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2650             kcunit++) {
2651                 struct content_filter *cfc = content_filters[kcunit - 1];
2652                 struct cfil_entry *entry;
2653                 struct cfil_entry *iter_entry;
2654                 struct cfil_entry *iter_prev;
2655
2656                 if (cfc == NULL) {
2657                         continue;
2658                 }
2659                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2660                         continue;
2661                 }
2662
2663                 entry = &cfil_info->cfi_entries[kcunit - 1];
2664
2665                 entry->cfe_filter = cfc;
2666                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2667                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2668                 cfc->cf_sock_count++;
2669
2670                 /* Insert the entry into the list ordered by control unit */
2671                 iter_prev = NULL;
2672                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2673                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2674                                 break;
2675                         }
2676                         iter_prev = iter_entry;
2677                 }
2678
2679                 if (iter_prev == NULL) {
2680                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2681                 } else {
2682                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2683                 }
2684
2685                 verify_content_filter(cfc);
2686                 attached = 1;
2687                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2688         }
2689
2690         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2691
2692         return attached;
2693 }
2694
2695 static void
2696 cfil_info_free(struct cfil_info *cfil_info)
2697 {
2698         int kcunit;
2699         uint64_t in_drain = 0;
2700         uint64_t out_drained = 0;
2701
2702         if (cfil_info == NULL) {
2703                 return;
2704         }
2705
2706         CFIL_LOG(LOG_INFO, "");
2707
2708         cfil_rw_lock_exclusive(&cfil_lck_rw);
2709
2710         for (kcunit = 1;
2711             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2712             kcunit++) {
2713                 struct cfil_entry *entry;
2714                 struct content_filter *cfc;
2715
2716                 entry = &cfil_info->cfi_entries[kcunit - 1];
2717
2718                 /* Don't be silly and try to detach twice */
2719                 if (entry->cfe_filter == NULL) {
2720                         continue;
2721                 }
2722
2723                 cfc = content_filters[kcunit - 1];
2724
2725                 VERIFY(cfc == entry->cfe_filter);
2726
2727                 entry->cfe_filter = NULL;
2728                 entry->cfe_necp_control_unit = 0;
2729                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2730                 cfc->cf_sock_count--;
2731
2732                 verify_content_filter(cfc);
2733         }
2734         if (cfil_info->cfi_hash_entry != NULL) {
2735                 cfil_sock_udp_attached_count--;
2736         }
2737         cfil_sock_attached_count--;
2738         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2739
2740         // Turn off stats reporting for cfil_info.
2741         cfil_info_stats_toggle(cfil_info, NULL, 0);
2742
2743         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2744         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2745
2746         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2747                 struct cfil_entry *entry;
2748
2749                 entry = &cfil_info->cfi_entries[kcunit - 1];
2750                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2751                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2752                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2753                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2754         }
2755         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2756
2757         if (out_drained) {
2758                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2759         }
2760         if (in_drain) {
2761                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2762         }
2763
2764         zfree(cfil_info_zone, cfil_info);
2765 }
2766
2767 /*
2768  * Received a verdict from userspace for a socket.
2769  * Perform any delayed operation if needed.
2770  */
2771 static void
2772 cfil_sock_received_verdict(struct socket *so)
2773 {
2774         if (so == NULL || so->so_cfil == NULL) {
2775                 return;
2776         }
2777
2778         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2779
2780         /*
2781          * If socket has already been connected, trigger
2782          * soisconnected now.
2783          */
2784         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2785                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2786                 soisconnected(so);
2787                 return;
2788         }
2789 }
2790
2791 /*
2792  * Entry point from Sockets layer
2793  * The socket is locked.
2794  *
2795  * Checks if a connected socket is subject to filter and
2796  * pending the initial verdict.
2797  */
2798 boolean_t
2799 cfil_sock_connected_pending_verdict(struct socket *so)
2800 {
2801         if (so == NULL || so->so_cfil == NULL) {
2802                 return false;
2803         }
2804
2805         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2806                 return false;
2807         } else {
2808                 /*
2809                  * Remember that this protocol is already connected, so
2810                  * we will trigger soisconnected() upon receipt of
2811                  * initial verdict later.
2812                  */
2813                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2814                 return true;
2815         }
2816 }
2817
2818 boolean_t
2819 cfil_filter_present(void)
2820 {
2821         return cfil_active_count > 0;
2822 }
2823
2824 /*
2825  * Entry point from Sockets layer
2826  * The socket is locked.
2827  */
2828 errno_t
2829 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2830 {
2831         errno_t error = 0;
2832         uint32_t filter_control_unit;
2833
2834         socket_lock_assert_owned(so);
2835
2836         /* Limit ourselves to TCP that are not MPTCP subflows */
2837         if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2838             so->so_proto->pr_domain->dom_family != PF_INET6) ||
2839             so->so_proto->pr_type != SOCK_STREAM ||
2840             so->so_proto->pr_protocol != IPPROTO_TCP ||
2841             (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2842             (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2843                 goto done;
2844         }
2845
2846         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2847         if (filter_control_unit == 0) {
2848                 goto done;
2849         }
2850
2851         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2852                 goto done;
2853         }
2854         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2855                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2856                 goto done;
2857         }
2858         if (cfil_active_count == 0) {
2859                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2860                 goto done;
2861         }
2862         if (so->so_cfil != NULL) {
2863                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2864                 CFIL_LOG(LOG_ERR, "already attached");
2865         } else {
2866                 cfil_info_alloc(so, NULL);
2867                 if (so->so_cfil == NULL) {
2868                         error = ENOMEM;
2869                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2870                         goto done;
2871                 }
2872                 so->so_cfil->cfi_dir = dir;
2873         }
2874         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2875                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2876                     filter_control_unit);
2877                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2878                 goto done;
2879         }
2880         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2881             (uint64_t)VM_KERNEL_ADDRPERM(so),
2882             filter_control_unit, so->so_cfil->cfi_sock_id);
2883
2884         so->so_flags |= SOF_CONTENT_FILTER;
2885         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2886
2887         /* Hold a reference on the socket */
2888         so->so_usecount++;
2889
2890         /*
2891          * Save passed addresses for attach event msg (in case resend
2892          * is needed.
2893          */
2894         if (remote != NULL) {
2895                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2896         }
2897         if (local != NULL) {
2898                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2899         }
2900
2901         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2902         /* We can recover from flow control or out of memory errors */
2903         if (error == ENOBUFS || error == ENOMEM) {
2904                 error = 0;
2905         } else if (error != 0) {
2906                 goto done;
2907         }
2908
2909         CFIL_INFO_VERIFY(so->so_cfil);
2910 done:
2911         return error;
2912 }
2913
2914 /*
2915  * Entry point from Sockets layer
2916  * The socket is locked.
2917  */
2918 errno_t
2919 cfil_sock_detach(struct socket *so)
2920 {
2921         if (IS_IP_DGRAM(so)) {
2922                 cfil_db_free(so);
2923                 return 0;
2924         }
2925
2926         if (so->so_cfil) {
2927                 if (so->so_flags & SOF_CONTENT_FILTER) {
2928                         so->so_flags &= ~SOF_CONTENT_FILTER;
2929                         VERIFY(so->so_usecount > 0);
2930                         so->so_usecount--;
2931                 }
2932                 cfil_info_free(so->so_cfil);
2933                 so->so_cfil = NULL;
2934                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2935         }
2936         return 0;
2937 }
2938
2939 /*
2940  * Fill in the address info of an event message from either
2941  * the socket or passed in address info.
2942  */
2943 static void
2944 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2945     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2946     boolean_t isIPv4, boolean_t outgoing)
2947 {
2948         if (isIPv4) {
2949                 struct in_addr laddr = {0}, faddr = {0};
2950                 u_int16_t lport = 0, fport = 0;
2951
2952                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2953
2954                 if (outgoing) {
2955                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2956                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2957                 } else {
2958                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2959                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2960                 }
2961         } else {
2962                 struct in6_addr *laddr = NULL, *faddr = NULL;
2963                 u_int16_t lport = 0, fport = 0;
2964
2965                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2966                 if (outgoing) {
2967                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2968                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2969                 } else {
2970                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2971                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2972                 }
2973         }
2974 }
2975
2976 static boolean_t
2977 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2978     struct cfil_info *cfil_info,
2979     struct cfil_msg_sock_attached *msg)
2980 {
2981         struct cfil_crypto_data data = {};
2982
2983         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2984                 return false;
2985         }
2986
2987         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2988         data.direction = msg->cfs_conn_dir;
2989
2990         data.pid = msg->cfs_pid;
2991         data.effective_pid = msg->cfs_e_pid;
2992         uuid_copy(data.uuid, msg->cfs_uuid);
2993         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2994         data.socketProtocol = msg->cfs_sock_protocol;
2995         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2996                 data.remote.sin6 = msg->cfs_dst.sin6;
2997                 data.local.sin6 = msg->cfs_src.sin6;
2998         } else {
2999                 data.remote.sin6 = msg->cfs_src.sin6;
3000                 data.local.sin6 = msg->cfs_dst.sin6;
3001         }
3002
3003         // At attach, if local address is already present, no need to re-sign subsequent data messages.
3004         if (!NULLADDRESS(data.local)) {
3005                 cfil_info->cfi_isSignatureLatest = true;
3006         }
3007
3008         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
3009         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
3010                 msg->cfs_signature_length = 0;
3011                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
3012                     msg->cfs_msghdr.cfm_sock_id);
3013                 return false;
3014         }
3015
3016         return true;
3017 }
3018
3019 static boolean_t
3020 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
3021     struct socket *so, struct cfil_info *cfil_info,
3022     struct cfil_msg_data_event *msg)
3023 {
3024         struct cfil_crypto_data data = {};
3025
3026         if (crypto_state == NULL || msg == NULL ||
3027             so == NULL || cfil_info == NULL) {
3028                 return false;
3029         }
3030
3031         data.sock_id = cfil_info->cfi_sock_id;
3032         data.direction = cfil_info->cfi_dir;
3033         data.pid = so->last_pid;
3034         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3035         if (so->so_flags & SOF_DELEGATED) {
3036                 data.effective_pid = so->e_pid;
3037                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3038         } else {
3039                 data.effective_pid = so->last_pid;
3040                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3041         }
3042         data.socketProtocol = so->so_proto->pr_protocol;
3043
3044         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3045                 data.remote.sin6 = msg->cfc_dst.sin6;
3046                 data.local.sin6 = msg->cfc_src.sin6;
3047         } else {
3048                 data.remote.sin6 = msg->cfc_src.sin6;
3049                 data.local.sin6 = msg->cfc_dst.sin6;
3050         }
3051
3052         // At first data, local address may show up for the first time, update address cache and
3053         // no need to re-sign subsequent data messages anymore.
3054         if (!NULLADDRESS(data.local)) {
3055                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3056                 cfil_info->cfi_isSignatureLatest = true;
3057         }
3058
3059         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3060         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3061                 msg->cfd_signature_length = 0;
3062                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3063                     msg->cfd_msghdr.cfm_sock_id);
3064                 return false;
3065         }
3066
3067         return true;
3068 }
3069
3070 static boolean_t
3071 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3072     struct socket *so, struct cfil_info *cfil_info,
3073     struct cfil_msg_sock_closed *msg)
3074 {
3075         struct cfil_crypto_data data = {};
3076         struct cfil_hash_entry hash_entry = {};
3077         struct cfil_hash_entry *hash_entry_ptr = NULL;
3078         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3079
3080         if (crypto_state == NULL || msg == NULL ||
3081             so == NULL || inp == NULL || cfil_info == NULL) {
3082                 return false;
3083         }
3084
3085         data.sock_id = cfil_info->cfi_sock_id;
3086         data.direction = cfil_info->cfi_dir;
3087
3088         data.pid = so->last_pid;
3089         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3090         if (so->so_flags & SOF_DELEGATED) {
3091                 data.effective_pid = so->e_pid;
3092                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3093         } else {
3094                 data.effective_pid = so->last_pid;
3095                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3096         }
3097         data.socketProtocol = so->so_proto->pr_protocol;
3098
3099         /*
3100          * Fill in address info:
3101          * For UDP, use the cfil_info hash entry directly.
3102          * For TCP, compose an hash entry with the saved addresses.
3103          */
3104         if (cfil_info->cfi_hash_entry != NULL) {
3105                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3106         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3107             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3108                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3109                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3110                 hash_entry_ptr = &hash_entry;
3111         }
3112         if (hash_entry_ptr != NULL) {
3113                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3114                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3115                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3116                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3117         }
3118
3119         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3120         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3121
3122         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3123         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3124                 msg->cfc_signature_length = 0;
3125                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3126                     msg->cfc_msghdr.cfm_sock_id);
3127                 return false;
3128         }
3129
3130         return true;
3131 }
3132
3133 static int
3134 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3135     uint32_t kcunit, int conn_dir)
3136 {
3137         errno_t error = 0;
3138         struct cfil_entry *entry = NULL;
3139         struct cfil_msg_sock_attached msg_attached;
3140         struct content_filter *cfc = NULL;
3141         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3142         struct cfil_hash_entry *hash_entry_ptr = NULL;
3143         struct cfil_hash_entry hash_entry;
3144
3145         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3146         proc_t p = PROC_NULL;
3147         task_t t = TASK_NULL;
3148
3149         socket_lock_assert_owned(so);
3150
3151         cfil_rw_lock_shared(&cfil_lck_rw);
3152
3153         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3154                 error = EINVAL;
3155                 goto done;
3156         }
3157
3158         if (kcunit == 0) {
3159                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3160         } else {
3161                 entry = &cfil_info->cfi_entries[kcunit - 1];
3162         }
3163
3164         if (entry == NULL) {
3165                 goto done;
3166         }
3167
3168         cfc = entry->cfe_filter;
3169         if (cfc == NULL) {
3170                 goto done;
3171         }
3172
3173         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3174                 goto done;
3175         }
3176
3177         if (kcunit == 0) {
3178                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3179         }
3180
3181         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3182             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3183
3184         /* Would be wasteful to try when flow controlled */
3185         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3186                 error = ENOBUFS;
3187                 goto done;
3188         }
3189
3190         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3191         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3192         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3193         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3194         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3195         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3196
3197         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3198         msg_attached.cfs_sock_type = so->so_proto->pr_type;
3199         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3200         msg_attached.cfs_pid = so->last_pid;
3201         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3202         if (so->so_flags & SOF_DELEGATED) {
3203                 msg_attached.cfs_e_pid = so->e_pid;
3204                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3205         } else {
3206                 msg_attached.cfs_e_pid = so->last_pid;
3207                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3208         }
3209
3210         /*
3211          * Fill in address info:
3212          * For UDP, use the cfil_info hash entry directly.
3213          * For TCP, compose an hash entry with the saved addresses.
3214          */
3215         if (cfil_info->cfi_hash_entry != NULL) {
3216                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3217         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3218             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3219                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3220                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3221                 hash_entry_ptr = &hash_entry;
3222         }
3223         if (hash_entry_ptr != NULL) {
3224                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3225                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
3226                     !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3227         }
3228         msg_attached.cfs_conn_dir = conn_dir;
3229
3230         if (msg_attached.cfs_e_pid != 0) {
3231                 p = proc_find(msg_attached.cfs_e_pid);
3232                 if (p != PROC_NULL) {
3233                         t = proc_task(p);
3234                         if (t != TASK_NULL) {
3235                                 audit_token_t audit_token;
3236                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3237                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3238                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3239                                 } else {
3240                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3241                                             entry->cfe_cfil_info->cfi_sock_id);
3242                                 }
3243                         }
3244                         proc_rele(p);
3245                 }
3246         }
3247
3248         if (cfil_info->cfi_debug) {
3249                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3250         }
3251
3252         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3253
3254 #if LIFECYCLE_DEBUG
3255         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3256             entry->cfe_cfil_info->cfi_sock_id);
3257 #endif
3258
3259         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3260             entry->cfe_filter->cf_kcunit,
3261             &msg_attached,
3262             sizeof(struct cfil_msg_sock_attached),
3263             CTL_DATA_EOR);
3264         if (error != 0) {
3265                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3266                 goto done;
3267         }
3268         microuptime(&entry->cfe_last_event);
3269         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3270         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3271
3272         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3273         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3274 done:
3275
3276         /* We can recover from flow control */
3277         if (error == ENOBUFS) {
3278                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3279                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3280
3281                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3282                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3283                 }
3284
3285                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3286
3287                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3288         } else {
3289                 if (error != 0) {
3290                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3291                 }
3292
3293                 cfil_rw_unlock_shared(&cfil_lck_rw);
3294         }
3295         return error;
3296 }
3297
3298 static int
3299 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3300 {
3301         errno_t error = 0;
3302         struct mbuf *msg = NULL;
3303         struct cfil_entry *entry;
3304         struct cfe_buf *entrybuf;
3305         struct cfil_msg_hdr msg_disconnected;
3306         struct content_filter *cfc;
3307
3308         socket_lock_assert_owned(so);
3309
3310         cfil_rw_lock_shared(&cfil_lck_rw);
3311
3312         entry = &cfil_info->cfi_entries[kcunit - 1];
3313         if (outgoing) {
3314                 entrybuf = &entry->cfe_snd;
3315         } else {
3316                 entrybuf = &entry->cfe_rcv;
3317         }
3318
3319         cfc = entry->cfe_filter;
3320         if (cfc == NULL) {
3321                 goto done;
3322         }
3323
3324         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3325             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3326
3327         /*
3328          * Send the disconnection event once
3329          */
3330         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3331             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3332                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3333                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3334                 goto done;
3335         }
3336
3337         /*
3338          * We're not disconnected as long as some data is waiting
3339          * to be delivered to the filter
3340          */
3341         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3342                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3343                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3344                 error = EBUSY;
3345                 goto done;
3346         }
3347         /* Would be wasteful to try when flow controlled */
3348         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3349                 error = ENOBUFS;
3350                 goto done;
3351         }
3352
3353         if (cfil_info->cfi_debug) {
3354                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3355         }
3356
3357 #if LIFECYCLE_DEBUG
3358         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3359             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3360             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3361 #endif
3362
3363         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3364         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3365         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3366         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3367         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3368             CFM_OP_DISCONNECT_IN;
3369         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3370         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3371             entry->cfe_filter->cf_kcunit,
3372             &msg_disconnected,
3373             sizeof(struct cfil_msg_hdr),
3374             CTL_DATA_EOR);
3375         if (error != 0) {
3376                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3377                 mbuf_freem(msg);
3378                 goto done;
3379         }
3380         microuptime(&entry->cfe_last_event);
3381         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3382
3383         /* Remember we have sent the disconnection message */
3384         if (outgoing) {
3385                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3386                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3387         } else {
3388                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3389                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3390         }
3391 done:
3392         if (error == ENOBUFS) {
3393                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3394                 OSIncrementAtomic(
3395                         &cfil_stats.cfs_disconnect_event_flow_control);
3396
3397                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3398                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3399                 }
3400
3401                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3402
3403                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3404         } else {
3405                 if (error != 0) {
3406                         OSIncrementAtomic(
3407                                 &cfil_stats.cfs_disconnect_event_fail);
3408                 }
3409
3410                 cfil_rw_unlock_shared(&cfil_lck_rw);
3411         }
3412         return error;
3413 }
3414
3415 int
3416 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3417 {
3418         struct cfil_entry *entry;
3419         struct cfil_msg_sock_closed msg_closed;
3420         errno_t error = 0;
3421         struct content_filter *cfc;
3422
3423         socket_lock_assert_owned(so);
3424
3425         cfil_rw_lock_shared(&cfil_lck_rw);
3426
3427         entry = &cfil_info->cfi_entries[kcunit - 1];
3428         cfc = entry->cfe_filter;
3429         if (cfc == NULL) {
3430                 goto done;
3431         }
3432
3433         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3434             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3435
3436         /* Would be wasteful to try when flow controlled */
3437         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3438                 error = ENOBUFS;
3439                 goto done;
3440         }
3441         /*
3442          * Send a single closed message per filter
3443          */
3444         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3445                 goto done;
3446         }
3447         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3448                 goto done;
3449         }
3450
3451         microuptime(&entry->cfe_last_event);
3452         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3453
3454         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3455         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3456         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3457         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3458         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3459         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3460         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3461         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3462         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3463         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3464         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3465         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3466         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3467
3468         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3469
3470         if (cfil_info->cfi_debug) {
3471                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3472         }
3473
3474 #if LIFECYCLE_DEBUG
3475         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3476 #endif
3477         /* for debugging
3478          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3479          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3480          *  }
3481          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3482          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3483          *  }
3484          */
3485
3486         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3487             entry->cfe_filter->cf_kcunit,
3488             &msg_closed,
3489             sizeof(struct cfil_msg_sock_closed),
3490             CTL_DATA_EOR);
3491         if (error != 0) {
3492                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3493                     error);
3494                 goto done;
3495         }
3496
3497         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3498         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3499 done:
3500         /* We can recover from flow control */
3501         if (error == ENOBUFS) {
3502                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3503                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3504
3505                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3506                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3507                 }
3508
3509                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3510
3511                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3512         } else {
3513                 if (error != 0) {
3514                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3515                 }
3516
3517                 cfil_rw_unlock_shared(&cfil_lck_rw);
3518         }
3519
3520         return error;
3521 }
3522
3523 static void
3524 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3525     struct in6_addr *ip6, u_int16_t port)
3526 {
3527         if (sin46 == NULL) {
3528                 return;
3529         }
3530
3531         struct sockaddr_in6 *sin6 = &sin46->sin6;
3532
3533         sin6->sin6_family = AF_INET6;
3534         sin6->sin6_len = sizeof(*sin6);
3535         sin6->sin6_port = port;
3536         sin6->sin6_addr = *ip6;
3537         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3538                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3539                 sin6->sin6_addr.s6_addr16[1] = 0;
3540         }
3541 }
3542
3543 static void
3544 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3545     struct in_addr ip, u_int16_t port)
3546 {
3547         if (sin46 == NULL) {
3548                 return;
3549         }
3550
3551         struct sockaddr_in *sin = &sin46->sin;
3552
3553         sin->sin_family = AF_INET;
3554         sin->sin_len = sizeof(*sin);
3555         sin->sin_port = port;
3556         sin->sin_addr.s_addr = ip.s_addr;
3557 }
3558
3559 static void
3560 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3561     struct in6_addr **laddr, struct in6_addr **faddr,
3562     u_int16_t *lport, u_int16_t *fport)
3563 {
3564         if (entry != NULL) {
3565                 *laddr = &entry->cfentry_laddr.addr6;
3566                 *faddr = &entry->cfentry_faddr.addr6;
3567                 *lport = entry->cfentry_lport;
3568                 *fport = entry->cfentry_fport;
3569         } else {
3570                 *laddr = &inp->in6p_laddr;
3571                 *faddr = &inp->in6p_faddr;
3572                 *lport = inp->inp_lport;
3573                 *fport = inp->inp_fport;
3574         }
3575 }
3576
3577 static void
3578 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3579     struct in_addr *laddr, struct in_addr *faddr,
3580     u_int16_t *lport, u_int16_t *fport)
3581 {
3582         if (entry != NULL) {
3583                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3584                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3585                 *lport = entry->cfentry_lport;
3586                 *fport = entry->cfentry_fport;
3587         } else {
3588                 *laddr = inp->inp_laddr;
3589                 *faddr = inp->inp_faddr;
3590                 *lport = inp->inp_lport;
3591                 *fport = inp->inp_fport;
3592         }
3593 }
3594
3595 static int
3596 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3597     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3598 {
3599         errno_t error = 0;
3600         struct mbuf *copy = NULL;
3601         struct mbuf *msg = NULL;
3602         unsigned int one = 1;
3603         struct cfil_msg_data_event *data_req;
3604         size_t hdrsize;
3605         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3606         struct cfil_entry *entry;
3607         struct cfe_buf *entrybuf;
3608         struct content_filter *cfc;
3609         struct timeval tv;
3610         int inp_flags = 0;
3611
3612         cfil_rw_lock_shared(&cfil_lck_rw);
3613
3614         entry = &cfil_info->cfi_entries[kcunit - 1];
3615         if (outgoing) {
3616                 entrybuf = &entry->cfe_snd;
3617         } else {
3618                 entrybuf = &entry->cfe_rcv;
3619         }
3620
3621         cfc = entry->cfe_filter;
3622         if (cfc == NULL) {
3623                 goto done;
3624         }
3625
3626         data = cfil_data_start(data);
3627         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3628                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3629                 goto done;
3630         }
3631
3632         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3633             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3634
3635         socket_lock_assert_owned(so);
3636
3637         /* Would be wasteful to try */
3638         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3639                 error = ENOBUFS;
3640                 goto done;
3641         }
3642
3643         /* Make a copy of the data to pass to kernel control socket */
3644         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3645             M_COPYM_NOOP_HDR);
3646         if (copy == NULL) {
3647                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3648                 error = ENOMEM;
3649                 goto done;
3650         }
3651
3652         /* We need an mbuf packet for the message header */
3653         hdrsize = sizeof(struct cfil_msg_data_event);
3654         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3655         if (error != 0) {
3656                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3657                 m_freem(copy);
3658                 /*
3659                  * ENOBUFS is to indicate flow control
3660                  */
3661                 error = ENOMEM;
3662                 goto done;
3663         }
3664         mbuf_setlen(msg, hdrsize);
3665         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3666         msg->m_next = copy;
3667         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3668         bzero(data_req, hdrsize);
3669         data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3670         data_req->cfd_msghdr.cfm_version = 1;
3671         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3672         data_req->cfd_msghdr.cfm_op =
3673             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3674         data_req->cfd_msghdr.cfm_sock_id =
3675             entry->cfe_cfil_info->cfi_sock_id;
3676         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3677         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3678
3679         data_req->cfd_flags = 0;
3680         if (OPTIONAL_IP_HEADER(so)) {
3681                 /*
3682                  * For non-UDP/TCP traffic, indicate to filters if optional
3683                  * IP header is present:
3684                  *      outgoing - indicate according to INP_HDRINCL flag
3685                  *      incoming - For IPv4 only, stripping of IP header is
3686                  *                 optional.  But for CFIL, we delay stripping
3687                  *                 at rip_input.  So CFIL always expects IP
3688                  *                 frames. IP header will be stripped according
3689                  *                 to INP_STRIPHDR flag later at reinjection.
3690                  */
3691                 if ((!outgoing && !IS_INP_V6(inp)) ||
3692                     (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3693                         data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3694                 }
3695         }
3696
3697         /*
3698          * Copy address/port into event msg.
3699          * For non connected sockets need to copy addresses from passed
3700          * parameters
3701          */
3702         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3703             &data_req->cfc_src, &data_req->cfc_dst,
3704             !IS_INP_V6(inp), outgoing);
3705
3706         if (cfil_info->cfi_debug) {
3707                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3708         }
3709
3710         if (cfil_info->cfi_isSignatureLatest == false) {
3711                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3712         }
3713
3714         microuptime(&tv);
3715         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3716
3717         /* Pass the message to the content filter */
3718         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3719             entry->cfe_filter->cf_kcunit,
3720             msg, CTL_DATA_EOR);
3721         if (error != 0) {
3722                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3723                 mbuf_freem(msg);
3724                 goto done;
3725         }
3726         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3727         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3728
3729 #if VERDICT_DEBUG
3730         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3731             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3732 #endif
3733
3734         if (cfil_info->cfi_debug) {
3735                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3736                     (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3737                     data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3738         }
3739
3740 done:
3741         if (error == ENOBUFS) {
3742                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3743                 OSIncrementAtomic(
3744                         &cfil_stats.cfs_data_event_flow_control);
3745
3746                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3747                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3748                 }
3749
3750                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3751
3752                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3753         } else {
3754                 if (error != 0) {
3755                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3756                 }
3757
3758                 cfil_rw_unlock_shared(&cfil_lck_rw);
3759         }
3760         return error;
3761 }
3762
3763 /*
3764  * Process the queue of data waiting to be delivered to content filter
3765  */
3766 static int
3767 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3768 {
3769         errno_t error = 0;
3770         struct mbuf *data, *tmp = NULL;
3771         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3772         struct cfil_entry *entry;
3773         struct cfe_buf *entrybuf;
3774         uint64_t currentoffset = 0;
3775
3776         if (cfil_info == NULL) {
3777                 return 0;
3778         }
3779
3780         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3781             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3782
3783         socket_lock_assert_owned(so);
3784
3785         entry = &cfil_info->cfi_entries[kcunit - 1];
3786         if (outgoing) {
3787                 entrybuf = &entry->cfe_snd;
3788         } else {
3789                 entrybuf = &entry->cfe_rcv;
3790         }
3791
3792         /* Send attached message if not yet done */
3793         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3794                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3795                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3796                 if (error != 0) {
3797                         /* We can recover from flow control */
3798                         if (error == ENOBUFS || error == ENOMEM) {
3799                                 error = 0;
3800                         }
3801                         goto done;
3802                 }
3803         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3804                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3805                 goto done;
3806         }
3807
3808 #if DATA_DEBUG
3809         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3810             entrybuf->cfe_pass_offset,
3811             entrybuf->cfe_peeked,
3812             entrybuf->cfe_peek_offset);
3813 #endif
3814
3815         /* Move all data that can pass */
3816         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3817             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3818                 datalen = cfil_data_length(data, NULL, NULL);
3819                 tmp = data;
3820
3821                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3822                     entrybuf->cfe_pass_offset) {
3823                         /*
3824                          * The first mbuf can fully pass
3825                          */
3826                         copylen = datalen;
3827                 } else {
3828                         /*
3829                          * The first mbuf can partially pass
3830                          */
3831                         copylen = entrybuf->cfe_pass_offset -
3832                             entrybuf->cfe_ctl_q.q_start;
3833                 }
3834                 VERIFY(copylen <= datalen);
3835
3836 #if DATA_DEBUG
3837                 CFIL_LOG(LOG_DEBUG,
3838                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3839                     "datalen %u copylen %u",
3840                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3841                     entrybuf->cfe_ctl_q.q_start,
3842                     entrybuf->cfe_peeked,
3843                     entrybuf->cfe_pass_offset,
3844                     entrybuf->cfe_peek_offset,
3845                     datalen, copylen);
3846 #endif
3847
3848                 /*
3849                  * Data that passes has been peeked at explicitly or
3850                  * implicitly
3851                  */
3852                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3853                     entrybuf->cfe_peeked) {
3854                         entrybuf->cfe_peeked =
3855                             entrybuf->cfe_ctl_q.q_start + copylen;
3856                 }
3857                 /*
3858                  * Stop on partial pass
3859                  */
3860                 if (copylen < datalen) {
3861                         break;
3862                 }
3863
3864                 /* All good, move full data from ctl queue to pending queue */
3865                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3866
3867                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3868                 if (outgoing) {
3869                         OSAddAtomic64(datalen,
3870                             &cfil_stats.cfs_pending_q_out_enqueued);
3871                 } else {
3872                         OSAddAtomic64(datalen,
3873                             &cfil_stats.cfs_pending_q_in_enqueued);
3874                 }
3875         }
3876         CFIL_INFO_VERIFY(cfil_info);
3877         if (tmp != NULL) {
3878                 CFIL_LOG(LOG_DEBUG,
3879                     "%llx first %llu peeked %llu pass %llu peek %llu"
3880                     "datalen %u copylen %u",
3881                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3882                     entrybuf->cfe_ctl_q.q_start,
3883                     entrybuf->cfe_peeked,
3884                     entrybuf->cfe_pass_offset,
3885                     entrybuf->cfe_peek_offset,
3886                     datalen, copylen);
3887         }
3888         tmp = NULL;
3889
3890         /* Now deal with remaining data the filter wants to peek at */
3891         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3892             currentoffset = entrybuf->cfe_ctl_q.q_start;
3893             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3894             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3895             currentoffset += datalen) {
3896                 datalen = cfil_data_length(data, NULL, NULL);
3897                 tmp = data;
3898
3899                 /* We've already peeked at this mbuf */
3900                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3901                         continue;
3902                 }
3903                 /*
3904                  * The data in the first mbuf may have been
3905                  * partially peeked at
3906                  */
3907                 copyoffset = entrybuf->cfe_peeked - currentoffset;
3908                 VERIFY(copyoffset < datalen);
3909                 copylen = datalen - copyoffset;
3910                 VERIFY(copylen <= datalen);
3911                 /*
3912                  * Do not copy more than needed
3913                  */
3914                 if (currentoffset + copyoffset + copylen >
3915                     entrybuf->cfe_peek_offset) {
3916                         copylen = entrybuf->cfe_peek_offset -
3917                             (currentoffset + copyoffset);
3918                 }
3919
3920 #if DATA_DEBUG
3921                 CFIL_LOG(LOG_DEBUG,
3922                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3923                     "datalen %u copylen %u copyoffset %u",
3924                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3925                     currentoffset,
3926                     entrybuf->cfe_peeked,
3927                     entrybuf->cfe_pass_offset,
3928                     entrybuf->cfe_peek_offset,
3929                     datalen, copylen, copyoffset);
3930 #endif
3931
3932                 /*
3933                  * Stop if there is nothing more to peek at
3934                  */
3935                 if (copylen == 0) {
3936                         break;
3937                 }
3938                 /*
3939                  * Let the filter get a peek at this span of data
3940                  */
3941                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3942                     outgoing, data, copyoffset, copylen);
3943                 if (error != 0) {
3944                         /* On error, leave data in ctl_q */
3945                         break;
3946                 }
3947                 entrybuf->cfe_peeked += copylen;
3948                 if (outgoing) {
3949                         OSAddAtomic64(copylen,
3950                             &cfil_stats.cfs_ctl_q_out_peeked);
3951                 } else {
3952                         OSAddAtomic64(copylen,
3953                             &cfil_stats.cfs_ctl_q_in_peeked);
3954                 }
3955
3956                 /* Stop when data could not be fully peeked at */
3957                 if (copylen + copyoffset < datalen) {
3958                         break;
3959                 }
3960         }
3961         CFIL_INFO_VERIFY(cfil_info);
3962         if (tmp != NULL) {
3963                 CFIL_LOG(LOG_DEBUG,
3964                     "%llx first %llu peeked %llu pass %llu peek %llu"
3965                     "datalen %u copylen %u copyoffset %u",
3966                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3967                     currentoffset,
3968                     entrybuf->cfe_peeked,
3969                     entrybuf->cfe_pass_offset,
3970                     entrybuf->cfe_peek_offset,
3971                     datalen, copylen, copyoffset);
3972         }
3973
3974         /*
3975          * Process data that has passed the filter
3976          */
3977         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3978         if (error != 0) {
3979                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3980                     error);
3981                 goto done;
3982         }
3983
3984         /*
3985          * Dispatch disconnect events that could not be sent
3986          */
3987         if (cfil_info == NULL) {
3988                 goto done;
3989         } else if (outgoing) {
3990                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3991                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3992                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3993                 }
3994         } else {
3995                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3996                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3997                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3998                 }
3999         }
4000
4001 done:
4002         CFIL_LOG(LOG_DEBUG,
4003             "first %llu peeked %llu pass %llu peek %llu",
4004             entrybuf->cfe_ctl_q.q_start,
4005             entrybuf->cfe_peeked,
4006             entrybuf->cfe_pass_offset,
4007             entrybuf->cfe_peek_offset);
4008
4009         CFIL_INFO_VERIFY(cfil_info);
4010         return error;
4011 }
4012
4013 /*
4014  * cfil_data_filter()
4015  *
4016  * Process data for a content filter installed on a socket
4017  */
4018 int
4019 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4020     struct mbuf *data, uint64_t datalen)
4021 {
4022         errno_t error = 0;
4023         struct cfil_entry *entry;
4024         struct cfe_buf *entrybuf;
4025
4026         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4027             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4028
4029         socket_lock_assert_owned(so);
4030
4031         entry = &cfil_info->cfi_entries[kcunit - 1];
4032         if (outgoing) {
4033                 entrybuf = &entry->cfe_snd;
4034         } else {
4035                 entrybuf = &entry->cfe_rcv;
4036         }
4037
4038         /* Are we attached to the filter? */
4039         if (entry->cfe_filter == NULL) {
4040                 error = 0;
4041                 goto done;
4042         }
4043
4044         /* Dispatch to filters */
4045         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4046         if (outgoing) {
4047                 OSAddAtomic64(datalen,
4048                     &cfil_stats.cfs_ctl_q_out_enqueued);
4049         } else {
4050                 OSAddAtomic64(datalen,
4051                     &cfil_stats.cfs_ctl_q_in_enqueued);
4052         }
4053
4054         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4055         if (error != 0) {
4056                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4057                     error);
4058         }
4059         /*
4060          * We have to return EJUSTRETURN in all cases to avoid double free
4061          * by socket layer
4062          */
4063         error = EJUSTRETURN;
4064 done:
4065         CFIL_INFO_VERIFY(cfil_info);
4066
4067         CFIL_LOG(LOG_INFO, "return %d", error);
4068         return error;
4069 }
4070
4071 /*
4072  * cfil_service_inject_queue() re-inject data that passed the
4073  * content filters
4074  */
4075 static int
4076 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4077 {
4078         mbuf_t data;
4079         unsigned int datalen;
4080         int mbcnt = 0;
4081         int mbnum = 0;
4082         errno_t error = 0;
4083         struct cfi_buf *cfi_buf;
4084         struct cfil_queue *inject_q;
4085         int need_rwakeup = 0;
4086         int count = 0;
4087         struct inpcb *inp = NULL;
4088         struct ip *ip = NULL;
4089         unsigned int hlen;
4090
4091         if (cfil_info == NULL) {
4092                 return 0;
4093         }
4094
4095         socket_lock_assert_owned(so);
4096
4097         if (outgoing) {
4098                 cfi_buf = &cfil_info->cfi_snd;
4099                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4100         } else {
4101                 cfi_buf = &cfil_info->cfi_rcv;
4102                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4103         }
4104         inject_q = &cfi_buf->cfi_inject_q;
4105
4106         if (cfil_queue_empty(inject_q)) {
4107                 return 0;
4108         }
4109
4110 #if DATA_DEBUG | VERDICT_DEBUG
4111         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4112             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4113 #endif
4114
4115         while ((data = cfil_queue_first(inject_q)) != NULL) {
4116                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4117
4118 #if DATA_DEBUG
4119                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4120                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4121 #endif
4122                 if (cfil_info->cfi_debug) {
4123                         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4124                             (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4125                 }
4126
4127                 /* Remove data from queue and adjust stats */
4128                 cfil_queue_remove(inject_q, data, datalen);
4129                 cfi_buf->cfi_pending_first += datalen;
4130                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4131                 cfi_buf->cfi_pending_mbnum -= mbnum;
4132                 cfil_info_buf_verify(cfi_buf);
4133
4134                 if (outgoing) {
4135                         error = sosend_reinject(so, NULL, data, NULL, 0);
4136                         if (error != 0) {
4137 #if DATA_DEBUG
4138                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4139                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4140 #endif
4141                                 break;
4142                         }
4143                         // At least one injection succeeded, need to wake up pending threads.
4144                         need_rwakeup = 1;
4145                 } else {
4146                         data->m_flags |= M_SKIPCFIL;
4147
4148                         /*
4149                          * NOTE: We currently only support TCP, UDP, ICMP,
4150                          * ICMPv6 and RAWIP.  For MPTCP and message TCP we'll
4151                          * need to call the appropriate sbappendxxx()
4152                          * of fix sock_inject_data_in()
4153                          */
4154                         if (IS_IP_DGRAM(so)) {
4155                                 if (OPTIONAL_IP_HEADER(so)) {
4156                                         inp = sotoinpcb(so);
4157                                         if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4158                                                 mbuf_t data_start = cfil_data_start(data);
4159                                                 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4160                                                         ip = mtod(data_start, struct ip *);
4161                                                         hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4162                                                         data_start->m_len -= hlen;
4163                                                         data_start->m_pkthdr.len -= hlen;
4164                                                         data_start->m_data += hlen;
4165                                                 }
4166                                         }
4167                                 }
4168
4169                                 if (sbappendchain(&so->so_rcv, data, 0)) {
4170                                         need_rwakeup = 1;
4171                                 }
4172                         } else {
4173                                 if (sbappendstream(&so->so_rcv, data)) {
4174                                         need_rwakeup = 1;
4175                                 }
4176                         }
4177                 }
4178
4179                 if (outgoing) {
4180                         OSAddAtomic64(datalen,
4181                             &cfil_stats.cfs_inject_q_out_passed);
4182                 } else {
4183                         OSAddAtomic64(datalen,
4184                             &cfil_stats.cfs_inject_q_in_passed);
4185                 }
4186
4187                 count++;
4188         }
4189
4190 #if DATA_DEBUG | VERDICT_DEBUG
4191         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4192             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4193 #endif
4194         if (cfil_info->cfi_debug) {
4195                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4196                     (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4197         }
4198
4199         /* A single wakeup is for several packets is more efficient */
4200         if (need_rwakeup) {
4201                 if (outgoing == TRUE) {
4202                         sowwakeup(so);
4203                 } else {
4204                         sorwakeup(so);
4205                 }
4206         }
4207
4208         if (error != 0 && cfil_info) {
4209                 if (error == ENOBUFS) {
4210                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4211                 }
4212                 if (error == ENOMEM) {
4213                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4214                 }
4215
4216                 if (outgoing) {
4217                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4218                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4219                 } else {
4220                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4221                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4222                 }
4223         }
4224
4225         /*
4226          * Notify
4227          */
4228         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4229                 cfil_sock_notify_shutdown(so, SHUT_WR);
4230                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4231                         soshutdownlock_final(so, SHUT_WR);
4232                 }
4233         }
4234         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4235                 if (cfil_filters_attached(so) == 0) {
4236                         CFIL_LOG(LOG_INFO, "so %llx waking",
4237                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4238                         wakeup((caddr_t)cfil_info);
4239                 }
4240         }
4241
4242         CFIL_INFO_VERIFY(cfil_info);
4243
4244         return error;
4245 }
4246
4247 static int
4248 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4249 {
4250         uint64_t passlen, curlen;
4251         mbuf_t data;
4252         unsigned int datalen;
4253         errno_t error = 0;
4254         struct cfil_entry *entry;
4255         struct cfe_buf *entrybuf;
4256         struct cfil_queue *pending_q;
4257
4258         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4259             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4260
4261         socket_lock_assert_owned(so);
4262
4263         entry = &cfil_info->cfi_entries[kcunit - 1];
4264         if (outgoing) {
4265                 entrybuf = &entry->cfe_snd;
4266         } else {
4267                 entrybuf = &entry->cfe_rcv;
4268         }
4269
4270         pending_q = &entrybuf->cfe_pending_q;
4271
4272         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4273
4274         /*
4275          * Locate the chunks of data that we can pass to the next filter
4276          * A data chunk must be on mbuf boundaries
4277          */
4278         curlen = 0;
4279         while ((data = cfil_queue_first(pending_q)) != NULL) {
4280                 struct cfil_entry *iter_entry;
4281                 datalen = cfil_data_length(data, NULL, NULL);
4282
4283 #if DATA_DEBUG
4284                 CFIL_LOG(LOG_DEBUG,
4285                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4286                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4287                     passlen, curlen);
4288 #endif
4289
4290                 if (curlen + datalen > passlen) {
4291                         break;
4292                 }
4293
4294                 cfil_queue_remove(pending_q, data, datalen);
4295
4296                 curlen += datalen;
4297
4298                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4299                     iter_entry != NULL;
4300                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4301                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4302                             data, datalen);
4303                         /* 0 means passed so we can continue */
4304                         if (error != 0) {
4305                                 break;
4306                         }
4307                 }
4308                 /* When data has passed all filters, re-inject */
4309                 if (error == 0) {
4310                         if (outgoing) {
4311                                 cfil_queue_enqueue(
4312                                         &cfil_info->cfi_snd.cfi_inject_q,
4313                                         data, datalen);
4314                                 OSAddAtomic64(datalen,
4315                                     &cfil_stats.cfs_inject_q_out_enqueued);
4316                         } else {
4317                                 cfil_queue_enqueue(
4318                                         &cfil_info->cfi_rcv.cfi_inject_q,
4319                                         data, datalen);
4320                                 OSAddAtomic64(datalen,
4321                                     &cfil_stats.cfs_inject_q_in_enqueued);
4322                         }
4323                 }
4324         }
4325
4326         CFIL_INFO_VERIFY(cfil_info);
4327
4328         return error;
4329 }
4330
4331 int
4332 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4333     uint64_t pass_offset, uint64_t peek_offset)
4334 {
4335         errno_t error = 0;
4336         struct cfil_entry *entry = NULL;
4337         struct cfe_buf *entrybuf;
4338         int updated = 0;
4339
4340         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4341
4342         socket_lock_assert_owned(so);
4343
4344         if (cfil_info == NULL) {
4345                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4346                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4347                 error = 0;
4348                 goto done;
4349         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4350                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4351                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4352                 error = EPIPE;
4353                 goto done;
4354         }
4355
4356         entry = &cfil_info->cfi_entries[kcunit - 1];
4357         if (outgoing) {
4358                 entrybuf = &entry->cfe_snd;
4359         } else {
4360                 entrybuf = &entry->cfe_rcv;
4361         }
4362
4363         /* Record updated offsets for this content filter */
4364         if (pass_offset > entrybuf->cfe_pass_offset) {
4365                 entrybuf->cfe_pass_offset = pass_offset;
4366
4367                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4368                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4369                 }
4370                 updated = 1;
4371         } else {
4372                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4373                     pass_offset, entrybuf->cfe_pass_offset);
4374         }
4375         /* Filter does not want or need to see data that's allowed to pass */
4376         if (peek_offset > entrybuf->cfe_pass_offset &&
4377             peek_offset > entrybuf->cfe_peek_offset) {
4378                 entrybuf->cfe_peek_offset = peek_offset;
4379                 updated = 1;
4380         }
4381         /* Nothing to do */
4382         if (updated == 0) {
4383                 goto done;
4384         }
4385
4386         /* Move data held in control queue to pending queue if needed */
4387         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4388         if (error != 0) {
4389                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4390                     error);
4391                 goto done;
4392         }
4393         error = EJUSTRETURN;
4394
4395 done:
4396         /*
4397          * The filter is effectively detached when pass all from both sides
4398          * or when the socket is closed and no more data is waiting
4399          * to be delivered to the filter
4400          */
4401         if (entry != NULL &&
4402             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4403             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4404             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4405             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4406             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4407                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4408 #if LIFECYCLE_DEBUG
4409                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4410                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4411                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4412 #endif
4413                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4414                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4415                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4416                     cfil_filters_attached(so) == 0) {
4417 #if LIFECYCLE_DEBUG
4418                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4419 #endif
4420                         CFIL_LOG(LOG_INFO, "so %llx waking",
4421                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4422                         wakeup((caddr_t)cfil_info);
4423                 }
4424         }
4425         CFIL_INFO_VERIFY(cfil_info);
4426         CFIL_LOG(LOG_INFO, "return %d", error);
4427         return error;
4428 }
4429
4430 /*
4431  * Update pass offset for socket when no data is pending
4432  */
4433 static int
4434 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4435 {
4436         struct cfi_buf *cfi_buf;
4437         struct cfil_entry *entry;
4438         struct cfe_buf *entrybuf;
4439         uint32_t kcunit;
4440         uint64_t pass_offset = 0;
4441
4442         if (cfil_info == NULL) {
4443                 return 0;
4444         }
4445
4446         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4447             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4448
4449         socket_lock_assert_owned(so);
4450
4451         if (outgoing) {
4452                 cfi_buf = &cfil_info->cfi_snd;
4453         } else {
4454                 cfi_buf = &cfil_info->cfi_rcv;
4455         }
4456
4457         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4458             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4459             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4460
4461         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4462                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4463                         entry = &cfil_info->cfi_entries[kcunit - 1];
4464
4465                         /* Are we attached to a filter? */
4466                         if (entry->cfe_filter == NULL) {
4467                                 continue;
4468                         }
4469
4470                         if (outgoing) {
4471                                 entrybuf = &entry->cfe_snd;
4472                         } else {
4473                                 entrybuf = &entry->cfe_rcv;
4474                         }
4475
4476                         if (pass_offset == 0 ||
4477                             entrybuf->cfe_pass_offset < pass_offset) {
4478                                 pass_offset = entrybuf->cfe_pass_offset;
4479                         }
4480                 }
4481                 cfi_buf->cfi_pass_offset = pass_offset;
4482         }
4483
4484         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4485             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4486
4487         return 0;
4488 }
4489
4490 int
4491 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4492     uint64_t pass_offset, uint64_t peek_offset)
4493 {
4494         errno_t error = 0;
4495
4496         CFIL_LOG(LOG_INFO, "");
4497
4498         socket_lock_assert_owned(so);
4499
4500         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4501         if (error != 0) {
4502                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4503                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4504                     outgoing ? "out" : "in");
4505                 goto release;
4506         }
4507
4508         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4509             pass_offset, peek_offset);
4510
4511         cfil_service_inject_queue(so, cfil_info, outgoing);
4512
4513         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4514 release:
4515         CFIL_INFO_VERIFY(cfil_info);
4516         cfil_release_sockbuf(so, outgoing);
4517
4518         return error;
4519 }
4520
4521
4522 static void
4523 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4524 {
4525         struct cfil_entry *entry;
4526         int kcunit;
4527         uint64_t drained;
4528
4529         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4530                 goto done;
4531         }
4532
4533         socket_lock_assert_owned(so);
4534
4535         /*
4536          * Flush the output queues and ignore errors as long as
4537          * we are attached
4538          */
4539         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4540         if (cfil_info != NULL) {
4541                 drained = 0;
4542                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4543                         entry = &cfil_info->cfi_entries[kcunit - 1];
4544
4545                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4546                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4547                 }
4548                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4549
4550                 if (drained) {
4551                         if (cfil_info->cfi_flags & CFIF_DROP) {
4552                                 OSIncrementAtomic(
4553                                         &cfil_stats.cfs_flush_out_drop);
4554                         } else {
4555                                 OSIncrementAtomic(
4556                                         &cfil_stats.cfs_flush_out_close);
4557                         }
4558                 }
4559         }
4560         cfil_release_sockbuf(so, 1);
4561
4562         /*
4563          * Flush the input queues
4564          */
4565         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4566         if (cfil_info != NULL) {
4567                 drained = 0;
4568                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4569                         entry = &cfil_info->cfi_entries[kcunit - 1];
4570
4571                         drained += cfil_queue_drain(
4572                                 &entry->cfe_rcv.cfe_ctl_q);
4573                         drained += cfil_queue_drain(
4574                                 &entry->cfe_rcv.cfe_pending_q);
4575                 }
4576                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4577
4578                 if (drained) {
4579                         if (cfil_info->cfi_flags & CFIF_DROP) {
4580                                 OSIncrementAtomic(
4581                                         &cfil_stats.cfs_flush_in_drop);
4582                         } else {
4583                                 OSIncrementAtomic(
4584                                         &cfil_stats.cfs_flush_in_close);
4585                         }
4586                 }
4587         }
4588         cfil_release_sockbuf(so, 0);
4589 done:
4590         CFIL_INFO_VERIFY(cfil_info);
4591 }
4592
4593 int
4594 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4595 {
4596         errno_t error = 0;
4597         struct cfil_entry *entry;
4598         struct proc *p;
4599
4600         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4601                 goto done;
4602         }
4603
4604         socket_lock_assert_owned(so);
4605
4606         entry = &cfil_info->cfi_entries[kcunit - 1];
4607
4608         /* Are we attached to the filter? */
4609         if (entry->cfe_filter == NULL) {
4610                 goto done;
4611         }
4612
4613         cfil_info->cfi_flags |= CFIF_DROP;
4614
4615         p = current_proc();
4616
4617         /*
4618          * Force the socket to be marked defunct
4619          * (forcing fixed along with rdar://19391339)
4620          */
4621         if (so->so_cfil_db == NULL) {
4622                 error = sosetdefunct(p, so,
4623                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4624                     FALSE);
4625
4626                 /* Flush the socket buffer and disconnect */
4627                 if (error == 0) {
4628                         error = sodefunct(p, so,
4629                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4630                 }
4631         }
4632
4633         /* The filter is done, mark as detached */
4634         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4635 #if LIFECYCLE_DEBUG
4636         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4637 #endif
4638         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4639             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4640
4641         /* Pending data needs to go */
4642         cfil_flush_queues(so, cfil_info);
4643
4644         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4645                 if (cfil_filters_attached(so) == 0) {
4646                         CFIL_LOG(LOG_INFO, "so %llx waking",
4647                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4648                         wakeup((caddr_t)cfil_info);
4649                 }
4650         }
4651 done:
4652         return error;
4653 }
4654
4655 int
4656 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4657 {
4658         errno_t error = 0;
4659         struct cfil_info *cfil_info = NULL;
4660
4661         bool cfil_attached = false;
4662         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4663
4664         // Search and lock socket
4665         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4666         if (so == NULL) {
4667                 error = ENOENT;
4668         } else {
4669                 // The client gets a pass automatically
4670                 cfil_info = (so->so_cfil_db != NULL) ?
4671                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4672
4673                 if (cfil_attached) {
4674 #if VERDICT_DEBUG
4675                         if (cfil_info != NULL) {
4676                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4677                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4678                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4679                                     cfil_info->cfi_sock_id);
4680                         }
4681 #endif
4682                         cfil_sock_received_verdict(so);
4683                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4684                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4685                 } else {
4686                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4687                 }
4688                 socket_unlock(so, 1);
4689         }
4690
4691         return error;
4692 }
4693
4694 int
4695 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4696 {
4697         struct content_filter *cfc = NULL;
4698         cfil_crypto_state_t crypto_state = NULL;
4699         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4700
4701         CFIL_LOG(LOG_NOTICE, "");
4702
4703         if (content_filters == NULL) {
4704                 CFIL_LOG(LOG_ERR, "no content filter");
4705                 return EINVAL;
4706         }
4707         if (kcunit > MAX_CONTENT_FILTER) {
4708                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4709                     kcunit, MAX_CONTENT_FILTER);
4710                 return EINVAL;
4711         }
4712         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4713         if (crypto_state == NULL) {
4714                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4715                     kcunit);
4716                 return EINVAL;
4717         }
4718
4719         cfil_rw_lock_exclusive(&cfil_lck_rw);
4720
4721         cfc = content_filters[kcunit - 1];
4722         if (cfc->cf_kcunit != kcunit) {
4723                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4724                     kcunit);
4725                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4726                 cfil_crypto_cleanup_state(crypto_state);
4727                 return EINVAL;
4728         }
4729         if (cfc->cf_crypto_state != NULL) {
4730                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4731                 cfc->cf_crypto_state = NULL;
4732         }
4733         cfc->cf_crypto_state = crypto_state;
4734
4735         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4736         return 0;
4737 }
4738
4739 static int
4740 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4741 {
4742         struct cfil_entry *entry;
4743         struct cfe_buf *entrybuf;
4744         uint32_t kcunit;
4745
4746         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4747             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4748
4749         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4750                 entry = &cfil_info->cfi_entries[kcunit - 1];
4751
4752                 /* Are we attached to the filter? */
4753                 if (entry->cfe_filter == NULL) {
4754                         continue;
4755                 }
4756
4757                 if (outgoing) {
4758                         entrybuf = &entry->cfe_snd;
4759                 } else {
4760                         entrybuf = &entry->cfe_rcv;
4761                 }
4762
4763                 entrybuf->cfe_ctl_q.q_start += datalen;
4764                 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4765                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4766                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4767                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4768                 }
4769
4770                 entrybuf->cfe_ctl_q.q_end += datalen;
4771
4772                 entrybuf->cfe_pending_q.q_start += datalen;
4773                 entrybuf->cfe_pending_q.q_end += datalen;
4774         }
4775         CFIL_INFO_VERIFY(cfil_info);
4776         return 0;
4777 }
4778
4779 int
4780 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4781     struct mbuf *data, struct mbuf *control, uint32_t flags)
4782 {
4783 #pragma unused(to, control, flags)
4784         errno_t error = 0;
4785         unsigned int datalen;
4786         int mbcnt = 0;
4787         int mbnum = 0;
4788         int kcunit;
4789         struct cfi_buf *cfi_buf;
4790         struct mbuf *chain = NULL;
4791
4792         if (cfil_info == NULL) {
4793                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4794                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4795                 error = 0;
4796                 goto done;
4797         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4798                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4799                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4800                 error = EPIPE;
4801                 goto done;
4802         }
4803
4804         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4805
4806         if (outgoing) {
4807                 cfi_buf = &cfil_info->cfi_snd;
4808                 cfil_info->cfi_byte_outbound_count += datalen;
4809         } else {
4810                 cfi_buf = &cfil_info->cfi_rcv;
4811                 cfil_info->cfi_byte_inbound_count += datalen;
4812         }
4813
4814         cfi_buf->cfi_pending_last += datalen;
4815         cfi_buf->cfi_pending_mbcnt += mbcnt;
4816         cfi_buf->cfi_pending_mbnum += mbnum;
4817
4818         if (IS_IP_DGRAM(so)) {
4819                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4820                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4821                         cfi_buf->cfi_tail_drop_cnt++;
4822                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4823                         cfi_buf->cfi_pending_mbnum -= mbnum;
4824                         return EPIPE;
4825                 }
4826         }
4827
4828         cfil_info_buf_verify(cfi_buf);
4829
4830 #if DATA_DEBUG
4831         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4832             (uint64_t)VM_KERNEL_ADDRPERM(so),
4833             outgoing ? "OUT" : "IN",
4834             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4835             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4836             cfi_buf->cfi_pending_last,
4837             cfi_buf->cfi_pending_mbcnt,
4838             cfi_buf->cfi_pass_offset);
4839 #endif
4840
4841         /* Fast path when below pass offset */
4842         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4843                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4844 #if DATA_DEBUG
4845                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4846 #endif
4847         } else {
4848                 struct cfil_entry *iter_entry;
4849                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4850                         // Is cfil attached to this filter?
4851                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4852                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4853                                 if (IS_IP_DGRAM(so) && chain == NULL) {
4854                                         /* Datagrams only:
4855                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4856                                          * This full chain will be reinjected into socket after recieving verdict.
4857                                          */
4858                                         (void) cfil_dgram_save_socket_state(cfil_info, data);
4859                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4860                                         if (chain == NULL) {
4861                                                 return ENOBUFS;
4862                                         }
4863                                         data = chain;
4864                                 }
4865                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4866                                     datalen);
4867                         }
4868                         /* 0 means passed so continue with next filter */
4869                         if (error != 0) {
4870                                 break;
4871                         }
4872                 }
4873         }
4874
4875         /* Move cursor if no filter claimed the data */
4876         if (error == 0) {
4877                 cfi_buf->cfi_pending_first += datalen;
4878                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4879                 cfi_buf->cfi_pending_mbnum -= mbnum;
4880                 cfil_info_buf_verify(cfi_buf);
4881         }
4882 done:
4883         CFIL_INFO_VERIFY(cfil_info);
4884
4885         return error;
4886 }
4887
4888 /*
4889  * Callback from socket layer sosendxxx()
4890  */
4891 int
4892 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4893     struct mbuf *data, struct mbuf *control, uint32_t flags)
4894 {
4895         int error = 0;
4896
4897         if (IS_IP_DGRAM(so)) {
4898                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4899         }
4900
4901         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4902                 return 0;
4903         }
4904
4905         /*
4906          * Pass initial data for TFO.
4907          */
4908         if (IS_INITIAL_TFO_DATA(so)) {
4909                 return 0;
4910         }
4911
4912         socket_lock_assert_owned(so);
4913
4914         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4915                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4916                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4917                 return EPIPE;
4918         }
4919         if (control != NULL) {
4920                 CFIL_LOG(LOG_ERR, "so %llx control",
4921                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4922                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4923         }
4924         if ((flags & MSG_OOB)) {
4925                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4926                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4927                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4928         }
4929         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4930                 panic("so %p SB_LOCK not set", so);
4931         }
4932
4933         if (so->so_snd.sb_cfil_thread != NULL) {
4934                 panic("%s sb_cfil_thread %p not NULL", __func__,
4935                     so->so_snd.sb_cfil_thread);
4936         }
4937
4938         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4939
4940         return error;
4941 }
4942
4943 /*
4944  * Callback from socket layer sbappendxxx()
4945  */
4946 int
4947 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4948     struct mbuf *data, struct mbuf *control, uint32_t flags)
4949 {
4950         int error = 0;
4951
4952         if (IS_IP_DGRAM(so)) {
4953                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4954         }
4955
4956         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4957                 return 0;
4958         }
4959
4960         /*
4961          * Pass initial data for TFO.
4962          */
4963         if (IS_INITIAL_TFO_DATA(so)) {
4964                 return 0;
4965         }
4966
4967         socket_lock_assert_owned(so);
4968
4969         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4970                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4971                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4972                 return EPIPE;
4973         }
4974         if (control != NULL) {
4975                 CFIL_LOG(LOG_ERR, "so %llx control",
4976                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4977                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4978         }
4979         if (data->m_type == MT_OOBDATA) {
4980                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4981                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4982                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4983         }
4984         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4985
4986         return error;
4987 }
4988
4989 /*
4990  * Callback from socket layer soshutdownxxx()
4991  *
4992  * We may delay the shutdown write if there's outgoing data in process.
4993  *
4994  * There is no point in delaying the shutdown read because the process
4995  * indicated that it does not want to read anymore data.
4996  */
4997 int
4998 cfil_sock_shutdown(struct socket *so, int *how)
4999 {
5000         int error = 0;
5001
5002         if (IS_IP_DGRAM(so)) {
5003                 return cfil_sock_udp_shutdown(so, how);
5004         }
5005
5006         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5007                 goto done;
5008         }
5009
5010         socket_lock_assert_owned(so);
5011
5012         CFIL_LOG(LOG_INFO, "so %llx how %d",
5013             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5014
5015         /*
5016          * Check the state of the socket before the content filter
5017          */
5018         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5019                 /* read already shut down */
5020                 error = ENOTCONN;
5021                 goto done;
5022         }
5023         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5024                 /* write already shut down */
5025                 error = ENOTCONN;
5026                 goto done;
5027         }
5028
5029         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5030                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5031                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5032                 goto done;
5033         }
5034
5035         /*
5036          * shutdown read: SHUT_RD or SHUT_RDWR
5037          */
5038         if (*how != SHUT_WR) {
5039                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5040                         error = ENOTCONN;
5041                         goto done;
5042                 }
5043                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5044                 cfil_sock_notify_shutdown(so, SHUT_RD);
5045         }
5046         /*
5047          * shutdown write: SHUT_WR or SHUT_RDWR
5048          */
5049         if (*how != SHUT_RD) {
5050                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5051                         error = ENOTCONN;
5052                         goto done;
5053                 }
5054                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5055                 cfil_sock_notify_shutdown(so, SHUT_WR);
5056                 /*
5057                  * When outgoing data is pending, we delay the shutdown at the
5058                  * protocol level until the content filters give the final
5059                  * verdict on the pending data.
5060                  */
5061                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5062                         /*
5063                          * When shutting down the read and write sides at once
5064                          * we can proceed to the final shutdown of the read
5065                          * side. Otherwise, we just return.
5066                          */
5067                         if (*how == SHUT_WR) {
5068                                 error = EJUSTRETURN;
5069                         } else if (*how == SHUT_RDWR) {
5070                                 *how = SHUT_RD;
5071                         }
5072                 }
5073         }
5074 done:
5075         return error;
5076 }
5077
5078 /*
5079  * This is called when the socket is closed and there is no more
5080  * opportunity for filtering
5081  */
5082 void
5083 cfil_sock_is_closed(struct socket *so)
5084 {
5085         errno_t error = 0;
5086         int kcunit;
5087
5088         if (IS_IP_DGRAM(so)) {
5089                 cfil_sock_udp_is_closed(so);
5090                 return;
5091         }
5092
5093         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5094                 return;
5095         }
5096
5097         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5098
5099         socket_lock_assert_owned(so);
5100
5101         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5102                 /* Let the filters know of the closing */
5103                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5104         }
5105
5106         /* Last chance to push passed data out */
5107         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5108         if (error == 0) {
5109                 cfil_service_inject_queue(so, so->so_cfil, 1);
5110         }
5111         cfil_release_sockbuf(so, 1);
5112
5113         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5114
5115         /* Pending data needs to go */
5116         cfil_flush_queues(so, so->so_cfil);
5117
5118         CFIL_INFO_VERIFY(so->so_cfil);
5119 }
5120
5121 /*
5122  * This is called when the socket is disconnected so let the filters
5123  * know about the disconnection and that no more data will come
5124  *
5125  * The how parameter has the same values as soshutown()
5126  */
5127 void
5128 cfil_sock_notify_shutdown(struct socket *so, int how)
5129 {
5130         errno_t error = 0;
5131         int kcunit;
5132
5133         if (IS_IP_DGRAM(so)) {
5134                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5135                 return;
5136         }
5137
5138         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5139                 return;
5140         }
5141
5142         CFIL_LOG(LOG_INFO, "so %llx how %d",
5143             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5144
5145         socket_lock_assert_owned(so);
5146
5147         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5148                 /* Disconnect incoming side */
5149                 if (how != SHUT_WR) {
5150                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5151                 }
5152                 /* Disconnect outgoing side */
5153                 if (how != SHUT_RD) {
5154                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5155                 }
5156         }
5157 }
5158
5159 static int
5160 cfil_filters_attached(struct socket *so)
5161 {
5162         struct cfil_entry *entry;
5163         uint32_t kcunit;
5164         int attached = 0;
5165
5166         if (IS_IP_DGRAM(so)) {
5167                 return cfil_filters_udp_attached(so, FALSE);
5168         }
5169
5170         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5171                 return 0;
5172         }
5173
5174         socket_lock_assert_owned(so);
5175
5176         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5177                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5178
5179                 /* Are we attached to the filter? */
5180                 if (entry->cfe_filter == NULL) {
5181                         continue;
5182                 }
5183                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5184                         continue;
5185                 }
5186                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5187                         continue;
5188                 }
5189                 attached = 1;
5190                 break;
5191         }
5192
5193         return attached;
5194 }
5195
5196 /*
5197  * This is called when the socket is closed and we are waiting for
5198  * the filters to gives the final pass or drop
5199  */
5200 void
5201 cfil_sock_close_wait(struct socket *so)
5202 {
5203         lck_mtx_t *mutex_held;
5204         struct timespec ts;
5205         int error;
5206
5207         if (IS_IP_DGRAM(so)) {
5208                 cfil_sock_udp_close_wait(so);
5209                 return;
5210         }
5211
5212         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5213                 return;
5214         }
5215
5216         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5217
5218         if (so->so_proto->pr_getlock != NULL) {
5219                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5220         } else {
5221                 mutex_held = so->so_proto->pr_domain->dom_mtx;
5222         }
5223         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5224
5225         while (cfil_filters_attached(so)) {
5226                 /*
5227                  * Notify the filters we are going away so they can detach
5228                  */
5229                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5230
5231                 /*
5232                  * Make sure we need to wait after the filter are notified
5233                  * of the disconnection
5234                  */
5235                 if (cfil_filters_attached(so) == 0) {
5236                         break;
5237                 }
5238
5239                 CFIL_LOG(LOG_INFO, "so %llx waiting",
5240                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5241
5242                 ts.tv_sec = cfil_close_wait_timeout / 1000;
5243                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5244                     NSEC_PER_USEC * 1000;
5245
5246                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5247                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5248                 error = msleep((caddr_t)so->so_cfil, mutex_held,
5249                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5250                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5251
5252                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5253                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5254
5255                 /*
5256                  * Force close in case of timeout
5257                  */
5258                 if (error != 0) {
5259                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5260                         break;
5261                 }
5262         }
5263 }
5264
5265 /*
5266  * Returns the size of the data held by the content filter by using
5267  */
5268 int32_t
5269 cfil_sock_data_pending(struct sockbuf *sb)
5270 {
5271         struct socket *so = sb->sb_so;
5272         uint64_t pending = 0;
5273
5274         if (IS_IP_DGRAM(so)) {
5275                 return cfil_sock_udp_data_pending(sb, FALSE);
5276         }
5277
5278         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5279                 struct cfi_buf *cfi_buf;
5280
5281                 socket_lock_assert_owned(so);
5282
5283                 if ((sb->sb_flags & SB_RECV) == 0) {
5284                         cfi_buf = &so->so_cfil->cfi_snd;
5285                 } else {
5286                         cfi_buf = &so->so_cfil->cfi_rcv;
5287                 }
5288
5289                 pending = cfi_buf->cfi_pending_last -
5290                     cfi_buf->cfi_pending_first;
5291
5292                 /*
5293                  * If we are limited by the "chars of mbufs used" roughly
5294                  * adjust so we won't overcommit
5295                  */
5296                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5297                         pending = cfi_buf->cfi_pending_mbcnt;
5298                 }
5299         }
5300
5301         VERIFY(pending < INT32_MAX);
5302
5303         return (int32_t)(pending);
5304 }
5305
5306 /*
5307  * Return the socket buffer space used by data being held by content filters
5308  * so processes won't clog the socket buffer
5309  */
5310 int32_t
5311 cfil_sock_data_space(struct sockbuf *sb)
5312 {
5313         struct socket *so = sb->sb_so;
5314         uint64_t pending = 0;
5315
5316         if (IS_IP_DGRAM(so)) {
5317                 return cfil_sock_udp_data_pending(sb, TRUE);
5318         }
5319
5320         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5321             so->so_snd.sb_cfil_thread != current_thread()) {
5322                 struct cfi_buf *cfi_buf;
5323
5324                 socket_lock_assert_owned(so);
5325
5326                 if ((sb->sb_flags & SB_RECV) == 0) {
5327                         cfi_buf = &so->so_cfil->cfi_snd;
5328                 } else {
5329                         cfi_buf = &so->so_cfil->cfi_rcv;
5330                 }
5331
5332                 pending = cfi_buf->cfi_pending_last -
5333                     cfi_buf->cfi_pending_first;
5334
5335                 /*
5336                  * If we are limited by the "chars of mbufs used" roughly
5337                  * adjust so we won't overcommit
5338                  */
5339                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5340                         pending = cfi_buf->cfi_pending_mbcnt;
5341                 }
5342         }
5343
5344         VERIFY(pending < INT32_MAX);
5345
5346         return (int32_t)(pending);
5347 }
5348
5349 /*
5350  * A callback from the socket and protocol layer when data becomes
5351  * available in the socket buffer to give a chance for the content filter
5352  * to re-inject data that was held back
5353  */
5354 void
5355 cfil_sock_buf_update(struct sockbuf *sb)
5356 {
5357         int outgoing;
5358         int error;
5359         struct socket *so = sb->sb_so;
5360
5361         if (IS_IP_DGRAM(so)) {
5362                 cfil_sock_udp_buf_update(sb);
5363                 return;
5364         }
5365
5366         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5367                 return;
5368         }
5369
5370         if (!cfil_sbtrim) {
5371                 return;
5372         }
5373
5374         socket_lock_assert_owned(so);
5375
5376         if ((sb->sb_flags & SB_RECV) == 0) {
5377                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5378                         return;
5379                 }
5380                 outgoing = 1;
5381                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5382         } else {
5383                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5384                         return;
5385                 }
5386                 outgoing = 0;
5387                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5388         }
5389
5390         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5391             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5392
5393         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5394         if (error == 0) {
5395                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5396         }
5397         cfil_release_sockbuf(so, outgoing);
5398 }
5399
5400 int
5401 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5402     struct sysctl_req *req)
5403 {
5404 #pragma unused(oidp, arg1, arg2)
5405         int error = 0;
5406         size_t len = 0;
5407         u_int32_t i;
5408
5409         /* Read only  */
5410         if (req->newptr != USER_ADDR_NULL) {
5411                 return EPERM;
5412         }
5413
5414         cfil_rw_lock_shared(&cfil_lck_rw);
5415
5416         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5417                 struct cfil_filter_stat filter_stat;
5418                 struct content_filter *cfc = content_filters[i];
5419
5420                 if (cfc == NULL) {
5421                         continue;
5422                 }
5423
5424                 /* If just asking for the size */
5425                 if (req->oldptr == USER_ADDR_NULL) {
5426                         len += sizeof(struct cfil_filter_stat);
5427                         continue;
5428                 }
5429
5430                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5431                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5432                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5433                 filter_stat.cfs_flags = cfc->cf_flags;
5434                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5435                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5436
5437                 error = SYSCTL_OUT(req, &filter_stat,
5438                     sizeof(struct cfil_filter_stat));
5439                 if (error != 0) {
5440                         break;
5441                 }
5442         }
5443         /* If just asking for the size */
5444         if (req->oldptr == USER_ADDR_NULL) {
5445                 req->oldidx = len;
5446         }
5447
5448         cfil_rw_unlock_shared(&cfil_lck_rw);
5449
5450 #if SHOW_DEBUG
5451         if (req->oldptr != USER_ADDR_NULL) {
5452                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5453                         cfil_filter_show(i);
5454                 }
5455         }
5456 #endif
5457
5458         return error;
5459 }
5460
5461 static int
5462 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5463     struct sysctl_req *req)
5464 {
5465 #pragma unused(oidp, arg1, arg2)
5466         int error = 0;
5467         u_int32_t i;
5468         struct cfil_info *cfi;
5469
5470         /* Read only  */
5471         if (req->newptr != USER_ADDR_NULL) {
5472                 return EPERM;
5473         }
5474
5475         cfil_rw_lock_shared(&cfil_lck_rw);
5476
5477         /*
5478          * If just asking for the size,
5479          */
5480         if (req->oldptr == USER_ADDR_NULL) {
5481                 req->oldidx = cfil_sock_attached_count *
5482                     sizeof(struct cfil_sock_stat);
5483                 /* Bump the length in case new sockets gets attached */
5484                 req->oldidx += req->oldidx >> 3;
5485                 goto done;
5486         }
5487
5488         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5489                 struct cfil_entry *entry;
5490                 struct cfil_sock_stat stat;
5491                 struct socket *so = cfi->cfi_so;
5492
5493                 bzero(&stat, sizeof(struct cfil_sock_stat));
5494                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5495                 stat.cfs_sock_id = cfi->cfi_sock_id;
5496                 stat.cfs_flags = cfi->cfi_flags;
5497
5498                 if (so != NULL) {
5499                         stat.cfs_pid = so->last_pid;
5500                         memcpy(stat.cfs_uuid, so->last_uuid,
5501                             sizeof(uuid_t));
5502                         if (so->so_flags & SOF_DELEGATED) {
5503                                 stat.cfs_e_pid = so->e_pid;
5504                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5505                                     sizeof(uuid_t));
5506                         } else {
5507                                 stat.cfs_e_pid = so->last_pid;
5508                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5509                                     sizeof(uuid_t));
5510                         }
5511
5512                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5513                         stat.cfs_sock_type = so->so_proto->pr_type;
5514                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5515                 }
5516
5517                 stat.cfs_snd.cbs_pending_first =
5518                     cfi->cfi_snd.cfi_pending_first;
5519                 stat.cfs_snd.cbs_pending_last =
5520                     cfi->cfi_snd.cfi_pending_last;
5521                 stat.cfs_snd.cbs_inject_q_len =
5522                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5523                 stat.cfs_snd.cbs_pass_offset =
5524                     cfi->cfi_snd.cfi_pass_offset;
5525
5526                 stat.cfs_rcv.cbs_pending_first =
5527                     cfi->cfi_rcv.cfi_pending_first;
5528                 stat.cfs_rcv.cbs_pending_last =
5529                     cfi->cfi_rcv.cfi_pending_last;
5530                 stat.cfs_rcv.cbs_inject_q_len =
5531                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5532                 stat.cfs_rcv.cbs_pass_offset =
5533                     cfi->cfi_rcv.cfi_pass_offset;
5534
5535                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5536                         struct cfil_entry_stat *estat;
5537                         struct cfe_buf *ebuf;
5538                         struct cfe_buf_stat *sbuf;
5539
5540                         entry = &cfi->cfi_entries[i];
5541
5542                         estat = &stat.ces_entries[i];
5543
5544                         estat->ces_len = sizeof(struct cfil_entry_stat);
5545                         estat->ces_filter_id = entry->cfe_filter ?
5546                             entry->cfe_filter->cf_kcunit : 0;
5547                         estat->ces_flags = entry->cfe_flags;
5548                         estat->ces_necp_control_unit =
5549                             entry->cfe_necp_control_unit;
5550
5551                         estat->ces_last_event.tv_sec =
5552                             (int64_t)entry->cfe_last_event.tv_sec;
5553                         estat->ces_last_event.tv_usec =
5554                             (int64_t)entry->cfe_last_event.tv_usec;
5555
5556                         estat->ces_last_action.tv_sec =
5557                             (int64_t)entry->cfe_last_action.tv_sec;
5558                         estat->ces_last_action.tv_usec =
5559                             (int64_t)entry->cfe_last_action.tv_usec;
5560
5561                         ebuf = &entry->cfe_snd;
5562                         sbuf = &estat->ces_snd;
5563                         sbuf->cbs_pending_first =
5564                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5565                         sbuf->cbs_pending_last =
5566                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5567                         sbuf->cbs_ctl_first =
5568                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5569                         sbuf->cbs_ctl_last =
5570                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5571                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5572                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5573                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5574
5575                         ebuf = &entry->cfe_rcv;
5576                         sbuf = &estat->ces_rcv;
5577                         sbuf->cbs_pending_first =
5578                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5579                         sbuf->cbs_pending_last =
5580                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5581                         sbuf->cbs_ctl_first =
5582                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5583                         sbuf->cbs_ctl_last =
5584                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5585                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5586                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5587                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5588                 }
5589                 error = SYSCTL_OUT(req, &stat,
5590                     sizeof(struct cfil_sock_stat));
5591                 if (error != 0) {
5592                         break;
5593                 }
5594         }
5595 done:
5596         cfil_rw_unlock_shared(&cfil_lck_rw);
5597
5598 #if SHOW_DEBUG
5599         if (req->oldptr != USER_ADDR_NULL) {
5600                 cfil_info_show();
5601         }
5602 #endif
5603
5604         return error;
5605 }
5606
5607 /*
5608  * UDP Socket Support
5609  */
5610 static void
5611 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5612 {
5613         char local[MAX_IPv6_STR_LEN + 6];
5614         char remote[MAX_IPv6_STR_LEN + 6];
5615         const void  *addr;
5616
5617         // No sock or not UDP, no-op
5618         if (so == NULL || entry == NULL) {
5619                 return;
5620         }
5621
5622         local[0] = remote[0] = 0x0;
5623
5624         switch (entry->cfentry_family) {
5625         case AF_INET6:
5626                 addr = &entry->cfentry_laddr.addr6;
5627                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5628                 addr = &entry->cfentry_faddr.addr6;
5629                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5630                 break;
5631         case AF_INET:
5632                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5633                 inet_ntop(AF_INET, addr, local, sizeof(local));
5634                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5635                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5636                 break;
5637         default:
5638                 return;
5639         }
5640
5641         CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5642             msg,
5643             IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5644             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5645             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5646 }
5647
5648 static void
5649 cfil_inp_log(int level, struct socket *so, const char* msg)
5650 {
5651         struct inpcb *inp = NULL;
5652         char local[MAX_IPv6_STR_LEN + 6];
5653         char remote[MAX_IPv6_STR_LEN + 6];
5654         const void  *addr;
5655
5656         if (so == NULL) {
5657                 return;
5658         }
5659
5660         inp = sotoinpcb(so);
5661         if (inp == NULL) {
5662                 return;
5663         }
5664
5665         local[0] = remote[0] = 0x0;
5666
5667 #if INET6
5668         if (inp->inp_vflag & INP_IPV6) {
5669                 addr = &inp->in6p_laddr.s6_addr32;
5670                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5671                 addr = &inp->in6p_faddr.s6_addr32;
5672                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5673         } else
5674 #endif /* INET6 */
5675         {
5676                 addr = &inp->inp_laddr.s_addr;
5677                 inet_ntop(AF_INET, addr, local, sizeof(local));
5678                 addr = &inp->inp_faddr.s_addr;
5679                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5680         }
5681
5682         if (so->so_cfil != NULL) {
5683                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5684                     msg, IS_UDP(so) ? "UDP" : "TCP",
5685                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5686                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5687         } else {
5688                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5689                     msg, IS_UDP(so) ? "UDP" : "TCP",
5690                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5691                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5692         }
5693 }
5694
5695 static void
5696 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5697 {
5698         if (cfil_info == NULL) {
5699                 return;
5700         }
5701
5702         if (cfil_info->cfi_hash_entry != NULL) {
5703                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5704         } else {
5705                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5706         }
5707 }
5708
5709 errno_t
5710 cfil_db_init(struct socket *so)
5711 {
5712         errno_t error = 0;
5713         struct cfil_db *db = NULL;
5714
5715         CFIL_LOG(LOG_INFO, "");
5716
5717         db = zalloc(cfil_db_zone);
5718         if (db == NULL) {
5719                 error = ENOMEM;
5720                 goto done;
5721         }
5722         bzero(db, sizeof(struct cfil_db));
5723         db->cfdb_so = so;
5724         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5725         if (db->cfdb_hashbase == NULL) {
5726                 zfree(cfil_db_zone, db);
5727                 db = NULL;
5728                 error = ENOMEM;
5729                 goto done;
5730         }
5731
5732         so->so_cfil_db = db;
5733
5734 done:
5735         return error;
5736 }
5737
5738 void
5739 cfil_db_free(struct socket *so)
5740 {
5741         struct cfil_hash_entry *entry = NULL;
5742         struct cfil_hash_entry *temp_entry = NULL;
5743         struct cfilhashhead *cfilhash = NULL;
5744         struct cfil_db *db = NULL;
5745
5746         CFIL_LOG(LOG_INFO, "");
5747
5748         if (so == NULL || so->so_cfil_db == NULL) {
5749                 return;
5750         }
5751         db = so->so_cfil_db;
5752
5753 #if LIFECYCLE_DEBUG
5754         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5755             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5756 #endif
5757
5758         for (int i = 0; i < CFILHASHSIZE; i++) {
5759                 cfilhash = &db->cfdb_hashbase[i];
5760                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5761                         if (entry->cfentry_cfil != NULL) {
5762 #if LIFECYCLE_DEBUG
5763                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5764 #endif
5765                                 cfil_info_free(entry->cfentry_cfil);
5766                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5767                                 entry->cfentry_cfil = NULL;
5768                         }
5769
5770                         cfil_db_delete_entry(db, entry);
5771                         if (so->so_flags & SOF_CONTENT_FILTER) {
5772                                 if (db->cfdb_count == 0) {
5773                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5774                                 }
5775                                 VERIFY(so->so_usecount > 0);
5776                                 so->so_usecount--;
5777                         }
5778                 }
5779         }
5780
5781         // Make sure all entries are cleaned up!
5782         VERIFY(db->cfdb_count == 0);
5783 #if LIFECYCLE_DEBUG
5784         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5785 #endif
5786
5787         FREE(db->cfdb_hashbase, M_CFIL);
5788         zfree(cfil_db_zone, db);
5789         so->so_cfil_db = NULL;
5790 }
5791
5792 static bool
5793 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5794 {
5795         struct sockaddr_in *sin = NULL;
5796         struct sockaddr_in6 *sin6 = NULL;
5797
5798         if (entry == NULL || addr == NULL) {
5799                 return FALSE;
5800         }
5801
5802         switch (addr->sa_family) {
5803         case AF_INET:
5804                 sin = satosin(addr);
5805                 if (sin->sin_len != sizeof(*sin)) {
5806                         return FALSE;
5807                 }
5808                 if (isLocal == TRUE) {
5809                         entry->cfentry_lport = sin->sin_port;
5810                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5811                 } else {
5812                         entry->cfentry_fport = sin->sin_port;
5813                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5814                 }
5815                 entry->cfentry_family = AF_INET;
5816                 return TRUE;
5817         case AF_INET6:
5818                 sin6 = satosin6(addr);
5819                 if (sin6->sin6_len != sizeof(*sin6)) {
5820                         return FALSE;
5821                 }
5822                 if (isLocal == TRUE) {
5823                         entry->cfentry_lport = sin6->sin6_port;
5824                         entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5825                 } else {
5826                         entry->cfentry_fport = sin6->sin6_port;
5827                         entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5828                 }
5829                 entry->cfentry_family = AF_INET6;
5830                 return TRUE;
5831         default:
5832                 return FALSE;
5833         }
5834 }
5835
5836 static bool
5837 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5838 {
5839         if (entry == NULL || inp == NULL) {
5840                 return FALSE;
5841         }
5842
5843         if (inp->inp_vflag & INP_IPV6) {
5844                 if (isLocal == TRUE) {
5845                         entry->cfentry_lport = inp->inp_lport;
5846                         entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5847                 } else {
5848                         entry->cfentry_fport = inp->inp_fport;
5849                         entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5850                 }
5851                 entry->cfentry_family = AF_INET6;
5852                 return TRUE;
5853         } else if (inp->inp_vflag & INP_IPV4) {
5854                 if (isLocal == TRUE) {
5855                         entry->cfentry_lport = inp->inp_lport;
5856                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5857                 } else {
5858                         entry->cfentry_fport = inp->inp_fport;
5859                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5860                 }
5861                 entry->cfentry_family = AF_INET;
5862                 return TRUE;
5863         }
5864         return FALSE;
5865 }
5866
5867 bool
5868 check_port(struct sockaddr *addr, u_short port)
5869 {
5870         struct sockaddr_in *sin = NULL;
5871         struct sockaddr_in6 *sin6 = NULL;
5872
5873         if (addr == NULL || port == 0) {
5874                 return FALSE;
5875         }
5876
5877         switch (addr->sa_family) {
5878         case AF_INET:
5879                 sin = satosin(addr);
5880                 if (sin->sin_len != sizeof(*sin)) {
5881                         return FALSE;
5882                 }
5883                 if (port == ntohs(sin->sin_port)) {
5884                         return TRUE;
5885                 }
5886                 break;
5887         case AF_INET6:
5888                 sin6 = satosin6(addr);
5889                 if (sin6->sin6_len != sizeof(*sin6)) {
5890                         return FALSE;
5891                 }
5892                 if (port == ntohs(sin6->sin6_port)) {
5893                         return TRUE;
5894                 }
5895                 break;
5896         default:
5897                 break;
5898         }
5899         return FALSE;
5900 }
5901
5902 struct cfil_hash_entry *
5903 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5904 {
5905         struct cfilhashhead *cfilhash = NULL;
5906         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5907         struct cfil_hash_entry *nextentry;
5908
5909         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5910                 return NULL;
5911         }
5912
5913         flowhash &= db->cfdb_hashmask;
5914         cfilhash = &db->cfdb_hashbase[flowhash];
5915
5916         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5917                 if (nextentry->cfentry_cfil != NULL &&
5918                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5919                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5920                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5921                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5922                         return nextentry;
5923                 }
5924         }
5925
5926         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5927             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5928         return NULL;
5929 }
5930
5931 struct cfil_hash_entry *
5932 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
5933 {
5934         struct cfil_hash_entry matchentry = { };
5935         struct cfil_hash_entry *nextentry = NULL;
5936         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5937         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5938         u_int16_t hashkey_fport = 0, hashkey_lport = 0;
5939         int inp_hash_element = 0;
5940         struct cfilhashhead *cfilhash = NULL;
5941
5942         CFIL_LOG(LOG_INFO, "");
5943
5944         if (inp == NULL) {
5945                 goto done;
5946         }
5947
5948         if (remoteOnly == false) {
5949                 if (local != NULL) {
5950                         fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5951                 } else {
5952                         fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5953                 }
5954         }
5955         if (remote != NULL) {
5956                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5957         } else {
5958                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5959         }
5960
5961 #if INET6
5962         if (inp->inp_vflag & INP_IPV6) {
5963                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5964                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
5965         } else
5966 #endif /* INET6 */
5967         {
5968                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5969                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
5970         }
5971
5972         hashkey_fport = matchentry.cfentry_fport;
5973         hashkey_lport = (remoteOnly == false) ? matchentry.cfentry_lport : 0;
5974
5975         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
5976         inp_hash_element &= db->cfdb_hashmask;
5977
5978         cfilhash = &db->cfdb_hashbase[inp_hash_element];
5979
5980         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5981 #if INET6
5982                 if ((inp->inp_vflag & INP_IPV6) &&
5983                     (remoteOnly || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
5984                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5985                     (remoteOnly || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
5986                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5987 #if DATA_DEBUG
5988                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5989 #endif
5990                         return nextentry;
5991                 } else
5992 #endif /* INET6 */
5993                 if ((remoteOnly || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
5994                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5995                     (remoteOnly || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
5996                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5997 #if DATA_DEBUG
5998                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5999 #endif
6000                         return nextentry;
6001                 }
6002         }
6003
6004 done:
6005 #if DATA_DEBUG
6006         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6007 #endif
6008         return NULL;
6009 }
6010
6011 void
6012 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6013 {
6014         if (hash_entry == NULL) {
6015                 return;
6016         }
6017         if (db == NULL || db->cfdb_count == 0) {
6018                 return;
6019         }
6020         db->cfdb_count--;
6021         if (db->cfdb_only_entry == hash_entry) {
6022                 db->cfdb_only_entry = NULL;
6023         }
6024         LIST_REMOVE(hash_entry, cfentry_link);
6025         zfree(cfil_hash_entry_zone, hash_entry);
6026 }
6027
6028 struct cfil_hash_entry *
6029 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6030 {
6031         struct cfil_hash_entry *entry = NULL;
6032         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6033         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6034         int inp_hash_element = 0;
6035         struct cfilhashhead *cfilhash = NULL;
6036
6037         CFIL_LOG(LOG_INFO, "");
6038
6039         if (inp == NULL) {
6040                 goto done;
6041         }
6042
6043         entry = zalloc(cfil_hash_entry_zone);
6044         if (entry == NULL) {
6045                 goto done;
6046         }
6047         bzero(entry, sizeof(struct cfil_hash_entry));
6048
6049         if (local != NULL) {
6050                 fill_cfil_hash_entry_from_address(entry, TRUE, local);
6051         } else {
6052                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
6053         }
6054         if (remote != NULL) {
6055                 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
6056         } else {
6057                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
6058         }
6059         entry->cfentry_lastused = net_uptime();
6060
6061 #if INET6
6062         if (inp->inp_vflag & INP_IPV6) {
6063                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6064                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6065         } else
6066 #endif /* INET6 */
6067         {
6068                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6069                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6070         }
6071         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6072             entry->cfentry_lport, entry->cfentry_fport);
6073         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6074
6075         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6076
6077         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6078         db->cfdb_count++;
6079         db->cfdb_only_entry = entry;
6080         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6081
6082 done:
6083         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6084         return entry;
6085 }
6086
6087 void
6088 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local)
6089 {
6090         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6091
6092         CFIL_LOG(LOG_INFO, "");
6093
6094         if (inp == NULL || entry == NULL) {
6095                 return;
6096         }
6097
6098         if (local != NULL) {
6099                 fill_cfil_hash_entry_from_address(entry, TRUE, local);
6100         } else {
6101                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
6102         }
6103         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: local updated");
6104
6105         return;
6106 }
6107
6108 struct cfil_info *
6109 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6110 {
6111         struct cfil_hash_entry *hash_entry = NULL;
6112
6113         CFIL_LOG(LOG_INFO, "");
6114
6115         if (db == NULL || id == 0) {
6116                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6117                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6118                 return NULL;
6119         }
6120
6121         // This is an optimization for connected UDP socket which only has one flow.
6122         // No need to do the hash lookup.
6123         if (db->cfdb_count == 1) {
6124                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6125                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6126                         return db->cfdb_only_entry->cfentry_cfil;
6127                 }
6128         }
6129
6130         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6131         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6132 }
6133
6134 struct cfil_hash_entry *
6135 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, int debug)
6136 {
6137         struct cfil_hash_entry *hash_entry = NULL;
6138
6139         errno_t error = 0;
6140         socket_lock_assert_owned(so);
6141
6142         // If new socket, allocate cfil db
6143         if (so->so_cfil_db == NULL) {
6144                 if (cfil_db_init(so) != 0) {
6145                         return NULL;
6146                 }
6147         }
6148
6149         // See if flow already exists.
6150         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6151         if (hash_entry == NULL) {
6152                 // No match with both local and remote, try match with remote only
6153                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6154                 if (hash_entry != NULL) {
6155                         // Simply update the local address into the original flow, keeping
6156                         // its sockId and flow_hash unchanged.
6157                         cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local);
6158                 }
6159         }
6160         if (hash_entry != NULL) {
6161                 return hash_entry;
6162         }
6163
6164         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6165         if (hash_entry == NULL) {
6166                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6167                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6168                 return NULL;
6169         }
6170
6171         if (cfil_info_alloc(so, hash_entry) == NULL ||
6172             hash_entry->cfentry_cfil == NULL) {
6173                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6174                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6175                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6176                 return NULL;
6177         }
6178         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6179         hash_entry->cfentry_cfil->cfi_debug = debug;
6180
6181 #if LIFECYCLE_DEBUG
6182         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6183 #endif
6184
6185         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6186                 cfil_info_free(hash_entry->cfentry_cfil);
6187                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6188                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6189                     filter_control_unit);
6190                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6191                 return NULL;
6192         }
6193         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6194             (uint64_t)VM_KERNEL_ADDRPERM(so),
6195             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6196
6197         so->so_flags |= SOF_CONTENT_FILTER;
6198         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6199
6200         /* Hold a reference on the socket for each flow */
6201         so->so_usecount++;
6202
6203         if (debug) {
6204                 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6205         }
6206
6207         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6208             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6209         /* We can recover from flow control or out of memory errors */
6210         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6211                 return NULL;
6212         }
6213
6214         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6215         return hash_entry;
6216 }
6217
6218 errno_t
6219 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6220     struct sockaddr *local, struct sockaddr *remote,
6221     struct mbuf *data, struct mbuf *control, uint32_t flags)
6222 {
6223 #pragma unused(outgoing, so, local, remote, data, control, flags)
6224         errno_t error = 0;
6225         uint32_t filter_control_unit;
6226         struct cfil_hash_entry *hash_entry = NULL;
6227         struct cfil_info *cfil_info = NULL;
6228         int debug = 0;
6229
6230         socket_lock_assert_owned(so);
6231
6232         if (cfil_active_count == 0) {
6233                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6234                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6235                 return error;
6236         }
6237
6238         // Socket has been blessed
6239         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6240                 return error;
6241         }
6242
6243         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6244         if (filter_control_unit == 0) {
6245                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6246                 return error;
6247         }
6248
6249         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6250                 return error;
6251         }
6252
6253         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6254                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6255                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6256                 return error;
6257         }
6258
6259         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, debug);
6260         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6261                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6262                 return EPIPE;
6263         }
6264         // Update last used timestamp, this is for flow Idle TO
6265         hash_entry->cfentry_lastused = net_uptime();
6266         cfil_info = hash_entry->cfentry_cfil;
6267
6268         if (cfil_info->cfi_flags & CFIF_DROP) {
6269 #if DATA_DEBUG
6270                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6271 #endif
6272                 return EPIPE;
6273         }
6274         if (control != NULL) {
6275                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6276         }
6277         if (data->m_type == MT_OOBDATA) {
6278                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6279                     (uint64_t)VM_KERNEL_ADDRPERM(so));
6280                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6281         }
6282
6283         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6284
6285         return error;
6286 }
6287
6288 /*
6289  * Go through all UDP flows for specified socket and returns TRUE if
6290  * any flow is still attached.  If need_wait is TRUE, wait on first
6291  * attached flow.
6292  */
6293 static int
6294 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6295 {
6296         struct timespec ts;
6297         lck_mtx_t *mutex_held;
6298         struct cfilhashhead *cfilhash = NULL;
6299         struct cfil_db *db = NULL;
6300         struct cfil_hash_entry *hash_entry = NULL;
6301         struct cfil_hash_entry *temp_hash_entry = NULL;
6302         struct cfil_info *cfil_info = NULL;
6303         struct cfil_entry *entry = NULL;
6304         errno_t error = 0;
6305         int kcunit;
6306         int attached = 0;
6307         uint64_t sock_flow_id = 0;
6308
6309         socket_lock_assert_owned(so);
6310
6311         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6312                 if (so->so_proto->pr_getlock != NULL) {
6313                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6314                 } else {
6315                         mutex_held = so->so_proto->pr_domain->dom_mtx;
6316                 }
6317                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6318
6319                 db = so->so_cfil_db;
6320
6321                 for (int i = 0; i < CFILHASHSIZE; i++) {
6322                         cfilhash = &db->cfdb_hashbase[i];
6323
6324                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6325                                 if (hash_entry->cfentry_cfil != NULL) {
6326                                         cfil_info = hash_entry->cfentry_cfil;
6327                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6328                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
6329
6330                                                 /* Are we attached to the filter? */
6331                                                 if (entry->cfe_filter == NULL) {
6332                                                         continue;
6333                                                 }
6334
6335                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6336                                                         continue;
6337                                                 }
6338                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6339                                                         continue;
6340                                                 }
6341
6342                                                 attached = 1;
6343
6344                                                 if (need_wait == TRUE) {
6345 #if LIFECYCLE_DEBUG
6346                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6347 #endif
6348
6349                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
6350                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6351                                                             NSEC_PER_USEC * 1000;
6352
6353                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6354                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6355                                                         sock_flow_id = cfil_info->cfi_sock_id;
6356
6357                                                         error = msleep((caddr_t)cfil_info, mutex_held,
6358                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6359
6360                                                         // Woke up from sleep, validate if cfil_info is still valid
6361                                                         if (so->so_cfil_db == NULL ||
6362                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6363                                                                 // cfil_info is not valid, do not continue
6364                                                                 goto done;
6365                                                         }
6366
6367                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6368
6369 #if LIFECYCLE_DEBUG
6370                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6371 #endif
6372
6373                                                         /*
6374                                                          * Force close in case of timeout
6375                                                          */
6376                                                         if (error != 0) {
6377                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6378 #if LIFECYCLE_DEBUG
6379                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6380 #endif
6381                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6382                                                         }
6383                                                 }
6384                                                 goto done;
6385                                         }
6386                                 }
6387                         }
6388                 }
6389         }
6390
6391 done:
6392         return attached;
6393 }
6394
6395 int32_t
6396 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6397 {
6398         struct socket *so = sb->sb_so;
6399         struct cfi_buf *cfi_buf;
6400         uint64_t pending = 0;
6401         uint64_t total_pending = 0;
6402         struct cfilhashhead *cfilhash = NULL;
6403         struct cfil_db *db = NULL;
6404         struct cfil_hash_entry *hash_entry = NULL;
6405         struct cfil_hash_entry *temp_hash_entry = NULL;
6406
6407         socket_lock_assert_owned(so);
6408
6409         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6410             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6411                 db = so->so_cfil_db;
6412
6413                 for (int i = 0; i < CFILHASHSIZE; i++) {
6414                         cfilhash = &db->cfdb_hashbase[i];
6415
6416                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6417                                 if (hash_entry->cfentry_cfil != NULL) {
6418                                         if ((sb->sb_flags & SB_RECV) == 0) {
6419                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6420                                         } else {
6421                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6422                                         }
6423
6424                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6425                                         /*
6426                                          * If we are limited by the "chars of mbufs used" roughly
6427                                          * adjust so we won't overcommit
6428                                          */
6429                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6430                                                 pending = cfi_buf->cfi_pending_mbcnt;
6431                                         }
6432
6433                                         total_pending += pending;
6434                                 }
6435                         }
6436                 }
6437
6438                 VERIFY(total_pending < INT32_MAX);
6439 #if DATA_DEBUG
6440                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6441                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6442                     total_pending, check_thread);
6443 #endif
6444         }
6445
6446         return (int32_t)(total_pending);
6447 }
6448
6449 int
6450 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6451 {
6452         struct cfil_info *cfil_info = NULL;
6453         struct cfilhashhead *cfilhash = NULL;
6454         struct cfil_db *db = NULL;
6455         struct cfil_hash_entry *hash_entry = NULL;
6456         struct cfil_hash_entry *temp_hash_entry = NULL;
6457         errno_t error = 0;
6458         int done_count = 0;
6459         int kcunit;
6460
6461         socket_lock_assert_owned(so);
6462
6463         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6464                 db = so->so_cfil_db;
6465
6466                 for (int i = 0; i < CFILHASHSIZE; i++) {
6467                         cfilhash = &db->cfdb_hashbase[i];
6468
6469                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6470                                 if (hash_entry->cfentry_cfil != NULL) {
6471                                         cfil_info = hash_entry->cfentry_cfil;
6472
6473                                         // This flow is marked as DROP
6474                                         if (cfil_info->cfi_flags & drop_flag) {
6475                                                 done_count++;
6476                                                 continue;
6477                                         }
6478
6479                                         // This flow has been shut already, skip
6480                                         if (cfil_info->cfi_flags & shut_flag) {
6481                                                 continue;
6482                                         }
6483                                         // Mark flow as shut
6484                                         cfil_info->cfi_flags |= shut_flag;
6485                                         done_count++;
6486
6487                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6488                                                 /* Disconnect incoming side */
6489                                                 if (how != SHUT_WR) {
6490                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6491                                                 }
6492                                                 /* Disconnect outgoing side */
6493                                                 if (how != SHUT_RD) {
6494                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6495                                                 }
6496                                         }
6497                                 }
6498                         }
6499                 }
6500         }
6501
6502         if (done_count == 0) {
6503                 error = ENOTCONN;
6504         }
6505         return error;
6506 }
6507
6508 int
6509 cfil_sock_udp_shutdown(struct socket *so, int *how)
6510 {
6511         int error = 0;
6512
6513         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6514                 goto done;
6515         }
6516
6517         socket_lock_assert_owned(so);
6518
6519         CFIL_LOG(LOG_INFO, "so %llx how %d",
6520             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6521
6522         /*
6523          * Check the state of the socket before the content filter
6524          */
6525         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6526                 /* read already shut down */
6527                 error = ENOTCONN;
6528                 goto done;
6529         }
6530         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6531                 /* write already shut down */
6532                 error = ENOTCONN;
6533                 goto done;
6534         }
6535
6536         /*
6537          * shutdown read: SHUT_RD or SHUT_RDWR
6538          */
6539         if (*how != SHUT_WR) {
6540                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6541                 if (error != 0) {
6542                         goto done;
6543                 }
6544         }
6545         /*
6546          * shutdown write: SHUT_WR or SHUT_RDWR
6547          */
6548         if (*how != SHUT_RD) {
6549                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6550                 if (error != 0) {
6551                         goto done;
6552                 }
6553
6554                 /*
6555                  * When outgoing data is pending, we delay the shutdown at the
6556                  * protocol level until the content filters give the final
6557                  * verdict on the pending data.
6558                  */
6559                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6560                         /*
6561                          * When shutting down the read and write sides at once
6562                          * we can proceed to the final shutdown of the read
6563                          * side. Otherwise, we just return.
6564                          */
6565                         if (*how == SHUT_WR) {
6566                                 error = EJUSTRETURN;
6567                         } else if (*how == SHUT_RDWR) {
6568                                 *how = SHUT_RD;
6569                         }
6570                 }
6571         }
6572 done:
6573         return error;
6574 }
6575
6576 void
6577 cfil_sock_udp_close_wait(struct socket *so)
6578 {
6579         socket_lock_assert_owned(so);
6580
6581         while (cfil_filters_udp_attached(so, FALSE)) {
6582                 /*
6583                  * Notify the filters we are going away so they can detach
6584                  */
6585                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6586
6587                 /*
6588                  * Make sure we need to wait after the filter are notified
6589                  * of the disconnection
6590                  */
6591                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6592                         break;
6593                 }
6594         }
6595 }
6596
6597 void
6598 cfil_sock_udp_is_closed(struct socket *so)
6599 {
6600         struct cfil_info *cfil_info = NULL;
6601         struct cfilhashhead *cfilhash = NULL;
6602         struct cfil_db *db = NULL;
6603         struct cfil_hash_entry *hash_entry = NULL;
6604         struct cfil_hash_entry *temp_hash_entry = NULL;
6605         errno_t error = 0;
6606         int kcunit;
6607
6608         socket_lock_assert_owned(so);
6609
6610         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6611                 db = so->so_cfil_db;
6612
6613                 for (int i = 0; i < CFILHASHSIZE; i++) {
6614                         cfilhash = &db->cfdb_hashbase[i];
6615
6616                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6617                                 if (hash_entry->cfentry_cfil != NULL) {
6618                                         cfil_info = hash_entry->cfentry_cfil;
6619
6620                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6621                                                 /* Let the filters know of the closing */
6622                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6623                                         }
6624
6625                                         /* Last chance to push passed data out */
6626                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6627                                         if (error == 0) {
6628                                                 cfil_service_inject_queue(so, cfil_info, 1);
6629                                         }
6630                                         cfil_release_sockbuf(so, 1);
6631
6632                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6633
6634                                         /* Pending data needs to go */
6635                                         cfil_flush_queues(so, cfil_info);
6636
6637                                         CFIL_INFO_VERIFY(cfil_info);
6638                                 }
6639                         }
6640                 }
6641         }
6642 }
6643
6644 void
6645 cfil_sock_udp_buf_update(struct sockbuf *sb)
6646 {
6647         struct cfil_info *cfil_info = NULL;
6648         struct cfilhashhead *cfilhash = NULL;
6649         struct cfil_db *db = NULL;
6650         struct cfil_hash_entry *hash_entry = NULL;
6651         struct cfil_hash_entry *temp_hash_entry = NULL;
6652         errno_t error = 0;
6653         int outgoing;
6654         struct socket *so = sb->sb_so;
6655
6656         socket_lock_assert_owned(so);
6657
6658         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6659                 if (!cfil_sbtrim) {
6660                         return;
6661                 }
6662
6663                 db = so->so_cfil_db;
6664
6665                 for (int i = 0; i < CFILHASHSIZE; i++) {
6666                         cfilhash = &db->cfdb_hashbase[i];
6667
6668                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6669                                 if (hash_entry->cfentry_cfil != NULL) {
6670                                         cfil_info = hash_entry->cfentry_cfil;
6671
6672                                         if ((sb->sb_flags & SB_RECV) == 0) {
6673                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6674                                                         return;
6675                                                 }
6676                                                 outgoing = 1;
6677                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6678                                         } else {
6679                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6680                                                         return;
6681                                                 }
6682                                                 outgoing = 0;
6683                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6684                                         }
6685
6686                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6687                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6688
6689                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6690                                         if (error == 0) {
6691                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6692                                         }
6693                                         cfil_release_sockbuf(so, outgoing);
6694                                 }
6695                         }
6696                 }
6697         }
6698 }
6699
6700 void
6701 cfil_filter_show(u_int32_t kcunit)
6702 {
6703         struct content_filter *cfc = NULL;
6704         struct cfil_entry *entry;
6705         int count = 0;
6706
6707         if (content_filters == NULL) {
6708                 return;
6709         }
6710         if (kcunit > MAX_CONTENT_FILTER) {
6711                 return;
6712         }
6713
6714         cfil_rw_lock_shared(&cfil_lck_rw);
6715
6716         if (content_filters[kcunit - 1] == NULL) {
6717                 cfil_rw_unlock_shared(&cfil_lck_rw);
6718                 return;
6719         }
6720         cfc = content_filters[kcunit - 1];
6721
6722         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6723             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6724         if (cfc->cf_flags & CFF_DETACHING) {
6725                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6726         }
6727         if (cfc->cf_flags & CFF_ACTIVE) {
6728                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6729         }
6730         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6731                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6732         }
6733
6734         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6735                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6736                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6737
6738                         count++;
6739
6740                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6741                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6742                         } else {
6743                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6744                         }
6745                 }
6746         }
6747
6748         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6749
6750         cfil_rw_unlock_shared(&cfil_lck_rw);
6751 }
6752
6753 void
6754 cfil_info_show(void)
6755 {
6756         struct cfil_info *cfil_info;
6757         int count = 0;
6758
6759         cfil_rw_lock_shared(&cfil_lck_rw);
6760
6761         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6762
6763         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6764                 count++;
6765
6766                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6767
6768                 if (cfil_info->cfi_flags & CFIF_DROP) {
6769                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6770                 }
6771                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6772                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6773                 }
6774                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6775                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6776                 }
6777                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6778                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6779                 }
6780                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6781                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6782                 }
6783                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6784                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6785                 }
6786                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6787                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6788                 }
6789         }
6790
6791         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6792
6793         cfil_rw_unlock_shared(&cfil_lck_rw);
6794 }
6795
6796 bool
6797 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6798 {
6799         if (cfil_info && cfil_info->cfi_hash_entry &&
6800             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6801 #if GC_DEBUG
6802                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6803 #endif
6804                 return true;
6805         }
6806         return false;
6807 }
6808
6809 bool
6810 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6811 {
6812         struct cfil_entry *entry;
6813         struct timeval current_tv;
6814         struct timeval diff_time;
6815
6816         if (cfil_info == NULL) {
6817                 return false;
6818         }
6819
6820         /*
6821          * If we have queued up more data than passed offset and we haven't received
6822          * an action from user space for a while (the user space filter might have crashed),
6823          * return action timed out.
6824          */
6825         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6826             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6827                 microuptime(&current_tv);
6828
6829                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6830                         entry = &cfil_info->cfi_entries[kcunit - 1];
6831
6832                         if (entry->cfe_filter == NULL) {
6833                                 continue;
6834                         }
6835
6836                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6837                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6838                                 // haven't gotten an action from this filter, check timeout
6839                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6840                                 if (diff_time.tv_sec >= timeout) {
6841 #if GC_DEBUG
6842                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6843 #endif
6844                                         return true;
6845                                 }
6846                         }
6847                 }
6848         }
6849         return false;
6850 }
6851
6852 bool
6853 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6854 {
6855         if (cfil_info == NULL) {
6856                 return false;
6857         }
6858
6859         /*
6860          * Clean up flow if it exceeded queue thresholds
6861          */
6862         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6863             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6864 #if GC_DEBUG
6865                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6866                     cfil_udp_gc_mbuf_num_max,
6867                     cfil_udp_gc_mbuf_cnt_max,
6868                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
6869                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6870                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6871 #endif
6872                 return true;
6873         }
6874
6875         return false;
6876 }
6877
6878 static void
6879 cfil_udp_gc_thread_sleep(bool forever)
6880 {
6881         if (forever) {
6882                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6883                     THREAD_INTERRUPTIBLE);
6884         } else {
6885                 uint64_t deadline = 0;
6886                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6887                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6888
6889                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6890                     THREAD_INTERRUPTIBLE, deadline);
6891         }
6892 }
6893
6894 static void
6895 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6896 {
6897 #pragma unused(v, w)
6898
6899         ASSERT(cfil_udp_gc_thread == current_thread());
6900         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6901
6902         // Kick off gc shortly
6903         cfil_udp_gc_thread_sleep(false);
6904         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6905         /* NOTREACHED */
6906 }
6907
6908 static void
6909 cfil_info_udp_expire(void *v, wait_result_t w)
6910 {
6911 #pragma unused(v, w)
6912
6913         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6914         static uint32_t expired_count = 0;
6915
6916         struct cfil_info *cfil_info;
6917         struct cfil_hash_entry *hash_entry;
6918         struct cfil_db *db;
6919         struct socket *so;
6920         u_int64_t current_time = 0;
6921
6922         current_time = net_uptime();
6923
6924         // Get all expired UDP flow ids
6925         cfil_rw_lock_shared(&cfil_lck_rw);
6926
6927         if (cfil_sock_udp_attached_count == 0) {
6928                 cfil_rw_unlock_shared(&cfil_lck_rw);
6929                 goto go_sleep;
6930         }
6931
6932         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6933                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6934                         break;
6935                 }
6936
6937                 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
6938                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6939                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6940                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
6941                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
6942                                 expired_count++;
6943                         }
6944                 }
6945         }
6946         cfil_rw_unlock_shared(&cfil_lck_rw);
6947
6948         if (expired_count == 0) {
6949                 goto go_sleep;
6950         }
6951
6952         for (uint32_t i = 0; i < expired_count; i++) {
6953                 // Search for socket (UDP only and lock so)
6954                 so = cfil_socket_from_sock_id(expired_array[i], true);
6955                 if (so == NULL) {
6956                         continue;
6957                 }
6958
6959                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6960                 if (cfil_info == NULL) {
6961                         goto unlock;
6962                 }
6963
6964                 db = so->so_cfil_db;
6965                 hash_entry = cfil_info->cfi_hash_entry;
6966
6967                 if (db == NULL || hash_entry == NULL) {
6968                         goto unlock;
6969                 }
6970
6971 #if GC_DEBUG || LIFECYCLE_DEBUG
6972                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6973 #endif
6974
6975                 cfil_db_delete_entry(db, hash_entry);
6976                 cfil_info_free(cfil_info);
6977                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6978
6979                 if (so->so_flags & SOF_CONTENT_FILTER) {
6980                         if (db->cfdb_count == 0) {
6981                                 so->so_flags &= ~SOF_CONTENT_FILTER;
6982                         }
6983                         VERIFY(so->so_usecount > 0);
6984                         so->so_usecount--;
6985                 }
6986 unlock:
6987                 socket_unlock(so, 1);
6988         }
6989
6990 #if GC_DEBUG
6991         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6992 #endif
6993         expired_count = 0;
6994
6995 go_sleep:
6996
6997         // Sleep forever (until waken up) if no more UDP flow to clean
6998         cfil_rw_lock_shared(&cfil_lck_rw);
6999         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7000         cfil_rw_unlock_shared(&cfil_lck_rw);
7001         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7002         /* NOTREACHED */
7003 }
7004
7005 struct m_tag *
7006 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7007 {
7008         struct m_tag *tag = NULL;
7009         struct cfil_tag *ctag = NULL;
7010         struct cfil_hash_entry *hash_entry = NULL;
7011         struct inpcb *inp = NULL;
7012
7013         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7014             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7015                 return NULL;
7016         }
7017
7018         inp = sotoinpcb(cfil_info->cfi_so);
7019
7020         /* Allocate a tag */
7021         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7022             sizeof(struct cfil_tag), M_DONTWAIT, m);
7023
7024         if (tag) {
7025                 ctag = (struct cfil_tag*)(tag + 1);
7026                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7027                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7028                 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7029
7030                 hash_entry = cfil_info->cfi_hash_entry;
7031                 if (hash_entry->cfentry_family == AF_INET6) {
7032                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7033                             &hash_entry->cfentry_faddr.addr6,
7034                             hash_entry->cfentry_fport);
7035                 } else if (hash_entry->cfentry_family == AF_INET) {
7036                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7037                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
7038                             hash_entry->cfentry_fport);
7039                 }
7040                 m_tag_prepend(m, tag);
7041                 return tag;
7042         }
7043         return NULL;
7044 }
7045
7046 struct m_tag *
7047 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
7048     struct sockaddr **faddr, int *inp_flags)
7049 {
7050         struct m_tag *tag = NULL;
7051         struct cfil_tag *ctag = NULL;
7052
7053         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7054         if (tag) {
7055                 ctag = (struct cfil_tag *)(tag + 1);
7056                 if (state_change_cnt) {
7057                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
7058                 }
7059                 if (options) {
7060                         *options = ctag->cfil_so_options;
7061                 }
7062                 if (faddr) {
7063                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7064                 }
7065                 if (inp_flags) {
7066                         *inp_flags = ctag->cfil_inp_flags;
7067                 }
7068
7069                 /*
7070                  * Unlink tag and hand it over to caller.
7071                  * Note that caller will be responsible to free it.
7072                  */
7073                 m_tag_unlink(m, tag);
7074                 return tag;
7075         }
7076         return NULL;
7077 }
7078
7079 boolean_t
7080 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7081 {
7082         struct m_tag *tag = NULL;
7083         struct cfil_tag *ctag = NULL;
7084
7085         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7086         if (tag) {
7087                 ctag = (struct cfil_tag *)(tag + 1);
7088                 if (inp_flags) {
7089                         *inp_flags = ctag->cfil_inp_flags;
7090                 }
7091                 return true;
7092         }
7093         return false;
7094 }
7095
7096 static int
7097 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7098 {
7099         struct content_filter *cfc = NULL;
7100         errno_t error = 0;
7101         size_t msgsize = 0;
7102
7103         if (buffer == NULL || stats_count == 0) {
7104                 return error;
7105         }
7106
7107         if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7108                 return error;
7109         }
7110
7111         cfc = content_filters[kcunit - 1];
7112         if (cfc == NULL) {
7113                 return error;
7114         }
7115
7116         /* Would be wasteful to try */
7117         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7118                 error = ENOBUFS;
7119                 goto done;
7120         }
7121
7122         msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7123         buffer->msghdr.cfm_len = msgsize;
7124         buffer->msghdr.cfm_version = 1;
7125         buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7126         buffer->msghdr.cfm_op = CFM_OP_STATS;
7127         buffer->msghdr.cfm_sock_id = 0;
7128         buffer->count = stats_count;
7129
7130 #if STATS_DEBUG
7131         CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7132             kcunit,
7133             (unsigned long)msgsize,
7134             (unsigned long)sizeof(struct cfil_msg_stats_report),
7135             (unsigned long)sizeof(struct cfil_msg_sock_stats),
7136             (unsigned long)stats_count);
7137 #endif
7138
7139         error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7140             buffer,
7141             msgsize,
7142             CTL_DATA_EOR);
7143         if (error != 0) {
7144                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7145                 goto done;
7146         }
7147         OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7148
7149 #if STATS_DEBUG
7150         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7151 #endif
7152
7153 done:
7154
7155         if (error == ENOBUFS) {
7156                 OSIncrementAtomic(
7157                         &cfil_stats.cfs_stats_event_flow_control);
7158
7159                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7160                         cfil_rw_lock_exclusive(&cfil_lck_rw);
7161                 }
7162
7163                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7164
7165                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7166         } else if (error != 0) {
7167                 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7168         }
7169
7170         return error;
7171 }
7172
7173 static void
7174 cfil_stats_report_thread_sleep(bool forever)
7175 {
7176 #if STATS_DEBUG
7177         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7178 #endif
7179
7180         if (forever) {
7181                 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7182                     THREAD_INTERRUPTIBLE);
7183         } else {
7184                 uint64_t deadline = 0;
7185                 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7186                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7187
7188                 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7189                     THREAD_INTERRUPTIBLE, deadline);
7190         }
7191 }
7192
7193 static void
7194 cfil_stats_report_thread_func(void *v, wait_result_t w)
7195 {
7196 #pragma unused(v, w)
7197
7198         ASSERT(cfil_stats_report_thread == current_thread());
7199         thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7200
7201         // Kick off gc shortly
7202         cfil_stats_report_thread_sleep(false);
7203         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7204         /* NOTREACHED */
7205 }
7206
7207 static bool
7208 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7209     struct cfil_info *cfil_info,
7210     struct cfil_entry *entry,
7211     struct timeval current_tv)
7212 {
7213         struct cfil_stats_report_buffer *buffer = NULL;
7214         struct cfil_msg_sock_stats *flow_array = NULL;
7215         struct cfil_msg_sock_stats *stats = NULL;
7216         struct inpcb *inp = NULL;
7217         struct timeval diff_time;
7218         uint64_t diff_time_usecs;
7219         int index = 0;
7220
7221         if (entry->cfe_stats_report_frequency == 0) {
7222                 return false;
7223         }
7224
7225         buffer = global_cfil_stats_report_buffers[kcunit - 1];
7226         if (buffer == NULL) {
7227 #if STATS_DEBUG
7228                 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7229 #endif
7230                 return false;
7231         }
7232
7233         timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7234         diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7235
7236 #if STATS_DEBUG
7237         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7238             (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7239             (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7240             (unsigned long long)current_tv.tv_sec,
7241             (unsigned long long)current_tv.tv_usec,
7242             (unsigned long long)diff_time.tv_sec,
7243             (unsigned long long)diff_time.tv_usec,
7244             (unsigned long long)diff_time_usecs,
7245             (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7246             cfil_info->cfi_sock_id);
7247 #endif
7248
7249         // Compare elapsed time in usecs
7250         if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7251 #if STATS_DEBUG
7252                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7253                     cfil_info->cfi_byte_inbound_count,
7254                     entry->cfe_byte_inbound_count_reported);
7255                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7256                     cfil_info->cfi_byte_outbound_count,
7257                     entry->cfe_byte_outbound_count_reported);
7258 #endif
7259                 // Check if flow has new bytes that have not been reported
7260                 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7261                     entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7262                         flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7263                         index = global_cfil_stats_counts[kcunit - 1];
7264
7265                         stats = &flow_array[index];
7266                         stats->cfs_sock_id = cfil_info->cfi_sock_id;
7267                         stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7268                         stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7269
7270                         if (entry->cfe_laddr_sent == false) {
7271                                 /* cache it if necessary */
7272                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7273                                         inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7274                                         if (inp != NULL) {
7275                                                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7276                                                 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7277                                                 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7278                                                 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7279                                                     src, dst, !IS_INP_V6(inp), outgoing);
7280                                         }
7281                                 }
7282
7283                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7284                                         stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7285                                         entry->cfe_laddr_sent = true;
7286                                 }
7287                         }
7288
7289                         global_cfil_stats_counts[kcunit - 1]++;
7290
7291                         entry->cfe_stats_report_ts = current_tv;
7292                         entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7293                         entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7294 #if STATS_DEBUG
7295                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7296 #endif
7297                         CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7298                         return true;
7299                 }
7300         }
7301         return false;
7302 }
7303
7304 static void
7305 cfil_stats_report(void *v, wait_result_t w)
7306 {
7307 #pragma unused(v, w)
7308
7309         struct cfil_info *cfil_info = NULL;
7310         struct cfil_entry *entry = NULL;
7311         struct timeval current_tv;
7312         uint32_t flow_count = 0;
7313         uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7314         bool flow_reported = false;
7315
7316 #if STATS_DEBUG
7317         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7318 #endif
7319
7320         do {
7321                 // Collect all sock ids of flows that has new stats
7322                 cfil_rw_lock_shared(&cfil_lck_rw);
7323
7324                 if (cfil_sock_attached_stats_count == 0) {
7325 #if STATS_DEBUG
7326                         CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7327 #endif
7328                         cfil_rw_unlock_shared(&cfil_lck_rw);
7329                         goto go_sleep;
7330                 }
7331
7332                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7333                         if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7334                                 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7335                         }
7336                         global_cfil_stats_counts[kcunit - 1] = 0;
7337                 }
7338
7339                 microuptime(&current_tv);
7340                 flow_count = 0;
7341
7342                 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7343                         if (saved_next_sock_id != 0 &&
7344                             saved_next_sock_id == cfil_info->cfi_sock_id) {
7345                                 // Here is where we left off previously, start accumulating
7346                                 saved_next_sock_id = 0;
7347                         }
7348
7349                         if (saved_next_sock_id == 0) {
7350                                 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7351                                         // Examine a fixed number of flows each round.  Remember the current flow
7352                                         // so we can start from here for next loop
7353                                         saved_next_sock_id = cfil_info->cfi_sock_id;
7354                                         break;
7355                                 }
7356
7357                                 flow_reported = false;
7358                                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7359                                         entry = &cfil_info->cfi_entries[kcunit - 1];
7360                                         if (entry->cfe_filter == NULL) {
7361 #if STATS_DEBUG
7362                                                 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7363                                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7364 #endif
7365                                                 continue;
7366                                         }
7367
7368                                         if ((entry->cfe_stats_report_frequency > 0) &&
7369                                             cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7370                                                 flow_reported = true;
7371                                         }
7372                                 }
7373                                 if (flow_reported == true) {
7374                                         flow_count++;
7375                                 }
7376                         }
7377                 }
7378
7379                 if (flow_count > 0) {
7380 #if STATS_DEBUG
7381                         CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7382 #endif
7383                         for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7384                                 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7385                                     global_cfil_stats_counts[kcunit - 1] > 0) {
7386                                         cfil_dispatch_stats_event_locked(kcunit,
7387                                             global_cfil_stats_report_buffers[kcunit - 1],
7388                                             global_cfil_stats_counts[kcunit - 1]);
7389                                 }
7390                         }
7391                 } else {
7392                         cfil_rw_unlock_shared(&cfil_lck_rw);
7393                         goto go_sleep;
7394                 }
7395
7396                 cfil_rw_unlock_shared(&cfil_lck_rw);
7397
7398                 // Loop again if we haven't finished the whole cfil_info list
7399         } while (saved_next_sock_id != 0);
7400
7401 go_sleep:
7402
7403         // Sleep forever (until waken up) if no more flow to report
7404         cfil_rw_lock_shared(&cfil_lck_rw);
7405         cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7406         cfil_rw_unlock_shared(&cfil_lck_rw);
7407         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7408         /* NOTREACHED */
7409 }