bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by INET/INET6 sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many INET/INET6 sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation supports all INET/INET6 sockets (i.e. TCP,
  50  *   UDP, ICMP, etc).
  51  * - The current implementation supports up to two simultaneous content filters
  52  *   for iOS devices and eight simultaneous content filters for OSX.
  53  *
  54  *
  55  * NECP FILTER CONTROL UNIT
  56  *
  57  * A user space filter agent uses the Network Extension Control Policy (NECP)
  58  * database to specify which INET/INET6 sockets need to be filtered. The NECP
  59  * criteria may be based on a variety of properties like user ID or proc UUID.
  60  *
  61  * The NECP "filter control unit" is used by the socket content filter subsystem
  62  * to deliver the relevant INET/INET6 content information to the appropriate
  63  * user space filter agent via its kernel control socket instance.
  64  * This works as follows:
  65  *
  66  * 1) The user space filter agent specifies an NECP filter control unit when
  67  *    in adds its filtering rules to the NECP database.
  68  *
  69  * 2) The user space filter agent also sets its NECP filter control unit on the
  70  *    content filter kernel control socket via the socket option
  71  *    CFIL_OPT_NECP_CONTROL_UNIT.
  72  *
  73  * 3) The NECP database is consulted to find out if a given INET/INET6 socket
  74  *    needs to be subjected to content filtering and returns the corresponding
  75  *    NECP filter control unit  -- the NECP filter control unit is actually
  76  *    stored in the INET/INET6 socket structure so the NECP lookup is really simple.
  77  *
  78  * 4) The NECP filter control unit is then used to find the corresponding
  79  *    kernel control socket instance.
  80  *
  81  * Note: NECP currently supports a single filter control unit per INET/INET6 socket
  82  *       but this restriction may be soon lifted.
  83  *
  84  *
  85  * THE MESSAGING PROTOCOL
  86  *
  87  * The socket content filter subsystem and a user space filter agent
  88  * communicate over the kernel control socket via an asynchronous
  89  * messaging protocol (this is not a request-response protocol).
  90  * The socket content filter subsystem sends event messages to the user
  91  * space filter agent about the INET/INET6 sockets it is interested to filter.
  92  * The user space filter agent sends action messages to either allow
  93  * data to pass or to disallow the data flow (and drop the connection).
  94  *
  95  * All messages over a content filter kernel control socket share the same
  96  * common header of type "struct cfil_msg_hdr". The message type tells if
  97  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  98  * The message header field "cfm_sock_id" identifies a given INET/INET6 flow.
  99  * For TCP, flows are per-socket.  For UDP and other datagrame protocols, there
 100  * could be multiple flows per socket.
 101  *
 102  * Note the message header length field may be padded for alignment and can
 103  * be larger than the actual content of the message.
 104  * The field "cfm_op" describe the kind of event or action.
 105  *
 106  * Here are the kinds of content filter events:
 107  * - CFM_OP_SOCKET_ATTACHED: a new INET/INET6 socket is being filtered
 108  * - CFM_OP_SOCKET_CLOSED: A INET/INET6 socket is closed
 109  * - CFM_OP_DATA_OUT: A span of data is being sent on a INET/INET6 socket
 110  * - CFM_OP_DATA_IN: A span of data is being or received on a INET/INET6 socket
 111  *
 112  *
 113  * EVENT MESSAGES
 114  *
 115  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 116  * data that is being sent or received. The position of this span of data
 117  * in the data flow is described by a set of start and end offsets. These
 118  * are absolute 64 bits offsets. The first byte sent (or received) starts
 119  * at offset 0 and ends at offset 1. The length of the content data
 120  * is given by the difference between the end offset and the start offset.
 121  *
 122  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 123  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 124  * action message is sent by the user space filter agent.
 125  *
 126  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 127  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 128  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 129  *
 130  * They are two kinds of primary content filter actions:
 131  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 132  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 133  *
 134  * There is also an action to mark a given client flow as already filtered
 135  * at a higher level, CFM_OP_BLESS_CLIENT.
 136  *
 137  *
 138  * ACTION MESSAGES
 139  *
 140  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 141  * agent allow data to flow up to the specified pass offset -- there
 142  * is a pass offset for outgoing data and a pass offset for incoming data.
 143  * When a new INET/INET6 socket is attached to the content filter and a flow is
 144  * created, each pass offset is initially set to 0 so no data is allowed to pass by
 145  * default.  When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 146  * then the data flow becomes unrestricted.
 147  *
 148  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 149  * with a pass offset smaller than the pass offset of a previous
 150  * CFM_OP_DATA_UPDATE message is silently ignored.
 151  *
 152  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 153  * to tell the kernel how much data it wants to see by using the peek offsets.
 154  * Just like pass offsets, there is a peek offset for each direction.
 155  * When a new INET/INET6 flow is created, each peek offset is initially set to 0
 156  * so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages are dispatched by default
 157  * until a CFM_OP_DATA_UPDATE action message with a greater than 0 peek offset is sent
 158  * by the user space filter agent.  When the peek offset is set to CFM_MAX_OFFSET via
 159  * a CFM_OP_DATA_UPDATE then the flow of update data events becomes unrestricted.
 160  *
 161  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 162  * Also a peek offsets cannot be smaller than the corresponding end offset
 163  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 164  * to set a too small peek value is silently ignored.
 165  *
 166  *
 167  * PER FLOW "struct cfil_info"
 168  *
 169  * As soon as a INET/INET6 socket gets attached to a content filter, a
 170  * "struct cfil_info" is created to hold the content filtering state for this
 171  * socket.  For UDP and other datagram protocols, as soon as traffic is seen for
 172  * each new flow identified by its 4-tuple of source address/port and destination
 173  * address/port, a "struct cfil_info" is created.  Each datagram socket may
 174  * have multiple flows maintained in a hash table of "struct cfil_info" entries.
 175  *
 176  * The content filtering state is made of the following information
 177  * for each direction:
 178  * - The current pass offset;
 179  * - The first and last offsets of the data pending, waiting for a filtering
 180  *   decision;
 181  * - The inject queue for data that passed the filters and that needs
 182  *   to be re-injected;
 183  * - A content filter specific state in a set of  "struct cfil_entry"
 184  *
 185  *
 186  * CONTENT FILTER STATE "struct cfil_entry"
 187  *
 188  * The "struct cfil_entry" maintains the information most relevant to the
 189  * message handling over a kernel control socket with a user space filter agent.
 190  *
 191  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 192  * to the kernel control socket unit it corresponds to and also has a pointer
 193  * to the corresponding "struct content_filter".
 194  *
 195  * For each direction, "struct cfil_entry" maintains the following information:
 196  * - The pass offset
 197  * - The peek offset
 198  * - The offset of the last data peeked at by the filter
 199  * - A queue of data that's waiting to be delivered to the  user space filter
 200  *   agent on the kernel control socket
 201  * - A queue of data for which event messages have been sent on the kernel
 202  *   control socket and are pending for a filtering decision.
 203  *
 204  *
 205  * CONTENT FILTER QUEUES
 206  *
 207  * Data that is being filtered is steered away from the INET/INET6 socket buffer
 208  * and instead will sit in one of three content filter queues until the data
 209  * can be re-injected into the INET/INET6 socket buffer.
 210  *
 211  * A content filter queue is represented by "struct cfil_queue" that contains
 212  * a list of mbufs and the start and end offset of the data span of
 213  * the list of mbufs.
 214  *
 215  * The data moves into the three content filter queues according to this
 216  * sequence:
 217  * a) The "cfe_ctl_q" of "struct cfil_entry"
 218  * b) The "cfe_pending_q" of "struct cfil_entry"
 219  * c) The "cfi_inject_q" of "struct cfil_info"
 220  *
 221  * Note: The sequence (a),(b) may be repeated several times if there is more
 222  * than one content filter attached to the INET/INET6 socket.
 223  *
 224  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 225  * kernel conntrol socket for two reasons:
 226  * - The peek offset is less that the end offset of the mbuf data
 227  * - The kernel control socket is flow controlled
 228  *
 229  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 230  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 231  * socket and are waiting for a pass action message fromn the user space
 232  * filter agent. An mbuf length must be fully allowed to pass to be removed
 233  * from the cfe_pending_q.
 234  *
 235  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 236  * by the user space filter agent and that needs to be re-injected into the
 237  * INET/INET6 socket.
 238  *
 239  *
 240  * IMPACT ON FLOW CONTROL
 241  *
 242  * An essential aspect of the content filer subsystem is to minimize the
 243  * impact on flow control of the INET/INET6 sockets being filtered.
 244  *
 245  * The processing overhead of the content filtering may have an effect on
 246  * flow control by adding noticeable delays and cannot be eliminated --
 247  * care must be taken by the user space filter agent to minimize the
 248  * processing delays.
 249  *
 250  * The amount of data being filtered is kept in buffers while waiting for
 251  * a decision by the user space filter agent. This amount of data pending
 252  * needs to be subtracted from the amount of data available in the
 253  * corresponding INET/INET6 socket buffer. This is done by modifying
 254  * sbspace() and tcp_sbspace() to account for amount of data pending
 255  * in the content filter.
 256  *
 257  *
 258  * LOCKING STRATEGY
 259  *
 260  * The global state of content filter subsystem is protected by a single
 261  * read-write lock "cfil_lck_rw". The data flow can be done with the
 262  * cfil read-write lock held as shared so it can be re-entered from multiple
 263  * threads.
 264  *
 265  * The per INET/INET6 socket content filterstate -- "struct cfil_info" -- is
 266  * protected by the socket lock.
 267  *
 268  * A INET/INET6 socket lock cannot be taken while the cfil read-write lock
 269  * is held. That's why we have some sequences where we drop the cfil read-write
 270  * lock before taking the INET/INET6 lock.
 271  *
 272  * It is also important to lock the INET/INET6 socket buffer while the content
 273  * filter is modifying the amount of pending data. Otherwise the calculations
 274  * in sbspace() and tcp_sbspace()  could be wrong.
 275  *
 276  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 277  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 278  *
 279  * Actually "cfe_link" and "cfe_filter" are protected by both by
 280  * "cfil_lck_rw" and the socket lock: they may be modified only when
 281  * "cfil_lck_rw" is exclusive and the socket is locked.
 282  *
 283  * To read the other fields of "struct content_filter" we have to take
 284  * "cfil_lck_rw" in shared mode.
 285  *
 286  * DATAGRAM SPECIFICS:
 287  *
 288  * The socket content filter supports all INET/INET6 protocols.  However
 289  * the treatments for TCP sockets and for datagram (UDP, ICMP, etc) sockets
 290  * are slightly different.
 291  *
 292  * Each datagram socket may have multiple flows.  Each flow is identified
 293  * by the flow's source address/port and destination address/port tuple
 294  * and is represented as a "struct cfil_info" entry.  For each socket,
 295  * a hash table is used to maintain the collection of flows under that socket.
 296  *
 297  * Each datagram flow is uniquely identified by it's "struct cfil_info" cfi_sock_id.
 298  * The highest 32-bits of the cfi_sock_id contains the socket's so_gencnt.  This portion
 299  * of the cfi_sock_id is used locate the socket during socket lookup.  The lowest 32-bits
 300  * of the cfi_sock_id contains a hash of the flow's 4-tuple.  This portion of the cfi_sock_id
 301  * is used as the hash value for the flow hash table lookup within the parent socket.
 302  *
 303  * Since datagram sockets may not be connected, flow states may not be maintained in the
 304  * socket structures and thus have to be saved for each packet.  These saved states will be
 305  * used for both outgoing and incoming reinjections.  For outgoing packets, destination
 306  * address/port as well as the current socket states will be saved.  During reinjection,
 307  * these saved states will be used instead.  For incoming packets, control and address
 308  * mbufs will be chained to the data.  During reinjection, the whole chain will be queued
 309  * onto the incoming socket buffer.
 310  *
 311  * LIMITATIONS
 312  *
 313  * - Support all INET/INET6 sockets, such as TCP, UDP, ICMP, etc
 314  *
 315  * - Does not support TCP unordered messages
 316  */
 317
 318 /*
 319  *      TO DO LIST
 320  *
 321  *      Deal with OOB
 322  *
 323  */
 324
 325 #include <sys/types.h>
 326 #include <sys/kern_control.h>
 327 #include <sys/queue.h>
 328 #include <sys/domain.h>
 329 #include <sys/protosw.h>
 330 #include <sys/syslog.h>
 331 #include <sys/systm.h>
 332 #include <sys/param.h>
 333 #include <sys/mbuf.h>
 334
 335 #include <kern/locks.h>
 336 #include <kern/zalloc.h>
 337 #include <kern/debug.h>
 338
 339 #include <net/content_filter.h>
 340 #include <net/content_filter_crypto.h>
 341
 342 #define _IP_VHL
 343 #include <netinet/ip.h>
 344 #include <netinet/in_pcb.h>
 345 #include <netinet/tcp.h>
 346 #include <netinet/tcp_var.h>
 347 #include <netinet/udp.h>
 348 #include <netinet/udp_var.h>
 349
 350 #include <string.h>
 351 #include <libkern/libkern.h>
 352 #include <kern/sched_prim.h>
 353 #include <kern/task.h>
 354 #include <mach/task_info.h>
 355
 356 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 357 #define MAX_CONTENT_FILTER 2
 358 #else
 359 #define MAX_CONTENT_FILTER 8
 360 #endif
 361
 362 extern struct inpcbinfo ripcbinfo;
 363 struct cfil_entry;
 364
 365 /*
 366  * The structure content_filter represents a user space content filter
 367  * It's created and associated with a kernel control socket instance
 368  */
 369 struct content_filter {
 370         kern_ctl_ref            cf_kcref;
 371         u_int32_t               cf_kcunit;
 372         u_int32_t               cf_flags;
 373
 374         uint32_t                cf_necp_control_unit;
 375
 376         uint32_t                cf_sock_count;
 377         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 378
 379         cfil_crypto_state_t cf_crypto_state;
 380 };
 381
 382 #define CFF_ACTIVE              0x01
 383 #define CFF_DETACHING           0x02
 384 #define CFF_FLOW_CONTROLLED     0x04
 385
 386 struct content_filter **content_filters = NULL;
 387 uint32_t cfil_active_count = 0; /* Number of active content filters */
 388 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 389 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 390 uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 391 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 392
 393 static kern_ctl_ref cfil_kctlref = NULL;
 394
 395 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 396 static lck_attr_t *cfil_lck_attr = NULL;
 397 static lck_grp_t *cfil_lck_grp = NULL;
 398 decl_lck_rw_data(static, cfil_lck_rw);
 399
 400 #define CFIL_RW_LCK_MAX 8
 401
 402 int cfil_rw_nxt_lck = 0;
 403 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 404
 405 int cfil_rw_nxt_unlck = 0;
 406 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 407
 408 static ZONE_DECLARE(content_filter_zone, "content_filter",
 409     sizeof(struct content_filter), ZC_NONE);
 410
 411 MBUFQ_HEAD(cfil_mqhead);
 412
 413 struct cfil_queue {
 414         uint64_t                q_start; /* offset of first byte in queue */
 415         uint64_t                q_end; /* offset of last byte in queue */
 416         struct cfil_mqhead      q_mq;
 417 };
 418
 419 /*
 420  * struct cfil_entry
 421  *
 422  * The is one entry per content filter
 423  */
 424 struct cfil_entry {
 425         TAILQ_ENTRY(cfil_entry) cfe_link;
 426         SLIST_ENTRY(cfil_entry) cfe_order_link;
 427         struct content_filter   *cfe_filter;
 428
 429         struct cfil_info        *cfe_cfil_info;
 430         uint32_t                cfe_flags;
 431         uint32_t                cfe_necp_control_unit;
 432         struct timeval          cfe_last_event; /* To user space */
 433         struct timeval          cfe_last_action; /* From user space */
 434         uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
 435         uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
 436         struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
 437         uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
 438         boolean_t               cfe_laddr_sent;
 439
 440         struct cfe_buf {
 441                 /*
 442                  * cfe_pending_q holds data that has been delivered to
 443                  * the filter and for which we are waiting for an action
 444                  */
 445                 struct cfil_queue       cfe_pending_q;
 446                 /*
 447                  * This queue is for data that has not be delivered to
 448                  * the content filter (new data, pass peek or flow control)
 449                  */
 450                 struct cfil_queue       cfe_ctl_q;
 451
 452                 uint64_t                cfe_pass_offset;
 453                 uint64_t                cfe_peek_offset;
 454                 uint64_t                cfe_peeked;
 455         } cfe_snd, cfe_rcv;
 456 };
 457
 458 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 459 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 460 #define CFEF_DATA_START                 0x0004  /* can send data event */
 461 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 462 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 463 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 464 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 465 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 466
 467
 468 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 469                 struct timeval64 _tdiff;                                                                                          \
 470                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 471                         timersub(t1, t0, &_tdiff);                                                                              \
 472                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 473                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 474                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 475                 }
 476
 477 struct cfil_hash_entry;
 478
 479 /*
 480  * struct cfil_info
 481  *
 482  * There is a struct cfil_info per socket
 483  */
 484 struct cfil_info {
 485         TAILQ_ENTRY(cfil_info)  cfi_link;
 486         TAILQ_ENTRY(cfil_info)  cfi_link_stats;
 487         struct socket           *cfi_so;
 488         uint64_t                cfi_flags;
 489         uint64_t                cfi_sock_id;
 490         struct timeval64        cfi_first_event;
 491         uint32_t                cfi_op_list_ctr;
 492         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 493         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 494         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 495         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 496
 497         int                     cfi_dir;
 498         uint64_t                cfi_byte_inbound_count;
 499         uint64_t                cfi_byte_outbound_count;
 500
 501         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 502         u_int32_t               cfi_filter_control_unit;
 503         u_int32_t               cfi_debug;
 504         struct cfi_buf {
 505                 /*
 506                  * cfi_pending_first and cfi_pending_last describe the total
 507                  * amount of data outstanding for all the filters on
 508                  * this socket and data in the flow queue
 509                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 510                  */
 511                 uint64_t                cfi_pending_first;
 512                 uint64_t                cfi_pending_last;
 513                 uint32_t                cfi_pending_mbcnt;
 514                 uint32_t                cfi_pending_mbnum;
 515                 uint32_t                cfi_tail_drop_cnt;
 516                 /*
 517                  * cfi_pass_offset is the minimum of all the filters
 518                  */
 519                 uint64_t                cfi_pass_offset;
 520                 /*
 521                  * cfi_inject_q holds data that needs to be re-injected
 522                  * into the socket after filtering and that can
 523                  * be queued because of flow control
 524                  */
 525                 struct cfil_queue       cfi_inject_q;
 526         } cfi_snd, cfi_rcv;
 527
 528         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 529         struct cfil_hash_entry *cfi_hash_entry;
 530         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 531         os_refcnt_t             cfi_ref_count;
 532 } __attribute__((aligned(8)));
 533
 534 #define CFIF_DROP               0x0001  /* drop action applied */
 535 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 536 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 537 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 538 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 539 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 540 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 541 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 542 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 543
 544 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 545 #define CFI_SHIFT_GENCNT        32
 546 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 547 #define CFI_SHIFT_FLOWHASH      0
 548
 549 #define CFI_ENTRY_KCUNIT(i, e) ((uint32_t)(((e) - &((i)->cfi_entries[0])) + 1))
 550
 551 static ZONE_DECLARE(cfil_info_zone, "cfil_info",
 552     sizeof(struct cfil_info), ZC_NONE);
 553
 554 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 555 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 556
 557 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 558 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 559
 560 /*
 561  * UDP Socket Support
 562  */
 563 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 564 #define CFILHASHSIZE 16
 565 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 566
 567 #define IS_INET(so) (so && so->so_proto && so->so_proto->pr_domain && (so->so_proto->pr_domain->dom_family == AF_INET || so->so_proto->pr_domain->dom_family == AF_INET6))
 568 #define IS_TCP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_STREAM && so->so_proto->pr_protocol == IPPROTO_TCP)
 569 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 570 #define IS_ICMP(so) (so && so->so_proto && (so->so_proto->pr_type == SOCK_RAW || so->so_proto->pr_type == SOCK_DGRAM) && \
 571                                            (so->so_proto->pr_protocol == IPPROTO_ICMP || so->so_proto->pr_protocol == IPPROTO_ICMPV6))
 572 #define IS_RAW(so)  (so && so->so_proto && so->so_proto->pr_type == SOCK_RAW  && so->so_proto->pr_protocol == IPPROTO_RAW)
 573
 574 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 575 #define IS_IP_DGRAM(so) (IS_INET(so) && IS_UDP(so))
 576 #else
 577 #define IS_IP_DGRAM(so) (IS_INET(so) && !IS_TCP(so))
 578 #endif
 579
 580 #define OPTIONAL_IP_HEADER(so) (!IS_TCP(so) && !IS_UDP(so))
 581 #define GET_SO_PROTO(so) ((so && so->so_proto) ? so->so_proto->pr_protocol : IPPROTO_MAX)
 582 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 583
 584 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 585                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 586 #define IS_INP_V6(inp) (inp && (inp->inp_vflag & INP_IPV6))
 587 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 588                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 589 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 590 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 591 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 592                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 593                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 594 #define LOCAL_ADDRESS_NEEDS_UPDATE(entry) \
 595                    ((entry->cfentry_family == AF_INET && entry->cfentry_laddr.addr46.ia46_addr4.s_addr == 0) || \
 596                     entry->cfentry_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&entry->cfentry_laddr.addr6))
 597 #define LOCAL_PORT_NEEDS_UPDATE(entry, so) (entry->cfentry_lport == 0 && IS_UDP(so))
 598
 599 #define SKIP_FILTER_FOR_TCP_SOCKET(so) \
 600     (so == NULL || so->so_proto == NULL || so->so_proto->pr_domain == NULL || \
 601      (so->so_proto->pr_domain->dom_family != PF_INET && so->so_proto->pr_domain->dom_family != PF_INET6) || \
 602       so->so_proto->pr_type != SOCK_STREAM || \
 603       so->so_proto->pr_protocol != IPPROTO_TCP || \
 604       (so->so_flags & SOF_MP_SUBFLOW) != 0 || \
 605       (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
 606
 607 os_refgrp_decl(static, cfil_refgrp, "CFILRefGroup", NULL);
 608
 609 #define CFIL_INFO_FREE(cfil_info) \
 610     if (cfil_info && (os_ref_release(&cfil_info->cfi_ref_count) == 0)) { \
 611         cfil_info_free(cfil_info); \
 612     }
 613
 614 /*
 615  * Periodic Statistics Report:
 616  */
 617 static struct thread *cfil_stats_report_thread;
 618 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
 619 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
 620 #define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
 621
 622 /* This buffer must have same layout as struct cfil_msg_stats_report */
 623 struct cfil_stats_report_buffer {
 624         struct cfil_msg_hdr        msghdr;
 625         uint32_t                   count;
 626         struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
 627 };
 628 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
 629 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
 630
 631 /*
 632  * UDP Garbage Collection:
 633  */
 634 static struct thread *cfil_udp_gc_thread;
 635 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 636 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 637 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 638 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 639
 640 /*
 641  * UDP flow queue thresholds
 642  */
 643 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 644 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 645 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 646 /*
 647  * UDP flow queue threshold globals:
 648  */
 649 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 650 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 651
 652 /*
 653  * struct cfil_hash_entry
 654  *
 655  * Hash entry for cfil_info
 656  */
 657 struct cfil_hash_entry {
 658         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 659         struct cfil_info               *cfentry_cfil;
 660         u_short cfentry_fport;
 661         u_short cfentry_lport;
 662         sa_family_t                    cfentry_family;
 663         u_int32_t                      cfentry_flowhash;
 664         u_int64_t                      cfentry_lastused;
 665         union {
 666                 /* foreign host table entry */
 667                 struct in_addr_4in6 addr46;
 668                 struct in6_addr addr6;
 669         } cfentry_faddr;
 670         union {
 671                 /* local host table entry */
 672                 struct in_addr_4in6 addr46;
 673                 struct in6_addr addr6;
 674         } cfentry_laddr;
 675         uint8_t                        cfentry_laddr_updated: 1;
 676         uint8_t                        cfentry_lport_updated: 1;
 677         uint8_t                        cfentry_reserved: 6;
 678 };
 679
 680 /*
 681  * struct cfil_db
 682  *
 683  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 684  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 685  */
 686 struct cfil_db {
 687         struct socket       *cfdb_so;
 688         uint32_t            cfdb_count;       /* Number of total content filters */
 689         struct cfilhashhead *cfdb_hashbase;
 690         u_long              cfdb_hashmask;
 691         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 692 };
 693
 694 /*
 695  * CFIL specific mbuf tag:
 696  * Save state of socket at the point of data entry into cfil.
 697  * Use saved state for reinjection at protocol layer.
 698  */
 699 struct cfil_tag {
 700         union sockaddr_in_4_6 cfil_faddr;
 701         uint32_t cfil_so_state_change_cnt;
 702         uint32_t cfil_so_options;
 703         int cfil_inp_flags;
 704 };
 705
 706 static ZONE_DECLARE(cfil_hash_entry_zone, "cfil_entry_hash",
 707     sizeof(struct cfil_hash_entry), ZC_NONE);
 708
 709 static ZONE_DECLARE(cfil_db_zone, "cfil_db",
 710     sizeof(struct cfil_db), ZC_NONE);
 711
 712 /*
 713  * Statistics
 714  */
 715
 716 struct cfil_stats cfil_stats;
 717
 718 /*
 719  * For troubleshooting
 720  */
 721 int cfil_log_level = LOG_ERR;
 722 int cfil_debug = 1;
 723
 724 // Debug controls added for selective debugging.
 725 // Disabled for production.  If enabled,
 726 // these will have performance impact
 727 #define LIFECYCLE_DEBUG 0
 728 #define VERDICT_DEBUG 0
 729 #define DATA_DEBUG 0
 730 #define SHOW_DEBUG 0
 731 #define GC_DEBUG 0
 732 #define STATS_DEBUG 0
 733
 734 /*
 735  * Sysctls for logs and statistics
 736  */
 737 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 738     struct sysctl_req *);
 739 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 740     struct sysctl_req *);
 741
 742 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 743
 744 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 745     &cfil_log_level, 0, "");
 746
 747 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 748     &cfil_debug, 0, "");
 749
 750 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 751     &cfil_sock_attached_count, 0, "");
 752
 753 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 754     &cfil_active_count, 0, "");
 755
 756 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 757     &cfil_close_wait_timeout, 0, "");
 758
 759 static int cfil_sbtrim = 1;
 760 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 761     &cfil_sbtrim, 0, "");
 762
 763 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 764     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 765
 766 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 767     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 768
 769 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 770     &cfil_stats, cfil_stats, "");
 771
 772 /*
 773  * Forward declaration to appease the compiler
 774  */
 775 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 776     uint64_t, uint64_t);
 777 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 778 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 779 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 780 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 781 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 782     struct mbuf *, struct mbuf *, uint32_t);
 783 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 784     struct mbuf *, uint32_t);
 785 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 786     struct in_addr, u_int16_t);
 787 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 788     struct in6_addr *, u_int16_t);
 789
 790 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 791 static void cfil_info_free(struct cfil_info *);
 792 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 793 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 794 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 795 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 796 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 797 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 798 static void cfil_info_verify(struct cfil_info *);
 799 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 800     uint64_t, uint64_t);
 801 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 802 static void cfil_release_sockbuf(struct socket *, int);
 803 static int cfil_filters_attached(struct socket *);
 804
 805 static void cfil_rw_lock_exclusive(lck_rw_t *);
 806 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 807 static void cfil_rw_lock_shared(lck_rw_t *);
 808 static void cfil_rw_unlock_shared(lck_rw_t *);
 809 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 810 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 811
 812 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 813 static errno_t cfil_db_init(struct socket *);
 814 static void cfil_db_free(struct socket *so);
 815 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t);
 816 struct cfil_hash_entry *cfil_db_lookup_entry_internal(struct cfil_db *, struct sockaddr *, struct sockaddr *, boolean_t, boolean_t);
 817 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 818 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 819 void cfil_db_update_entry_local(struct cfil_db *, struct cfil_hash_entry *, struct sockaddr *, struct mbuf *);
 820 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 821 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *, struct mbuf *, int);
 822 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 823 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 824     struct mbuf *, struct mbuf *, uint32_t);
 825 static int cfil_sock_udp_get_address_from_control(sa_family_t, struct mbuf *, uint8_t **);
 826 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 827 static void cfil_sock_udp_is_closed(struct socket *);
 828 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 829 static int cfil_sock_udp_shutdown(struct socket *, int *);
 830 static void cfil_sock_udp_close_wait(struct socket *);
 831 static void cfil_sock_udp_buf_update(struct sockbuf *);
 832 static int cfil_filters_udp_attached(struct socket *, bool);
 833 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 834     struct in6_addr **, struct in6_addr **,
 835     u_int16_t *, u_int16_t *);
 836 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 837     struct in_addr *, struct in_addr *,
 838     u_int16_t *, u_int16_t *);
 839 static void cfil_info_log(int, struct cfil_info *, const char *);
 840 void cfil_filter_show(u_int32_t);
 841 void cfil_info_show(void);
 842 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int64_t);
 843 bool cfil_info_action_timed_out(struct cfil_info *, int);
 844 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 845 struct m_tag *cfil_dgram_save_socket_state(struct cfil_info *, struct mbuf *);
 846 boolean_t cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags);
 847 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 848 static void cfil_info_udp_expire(void *, wait_result_t);
 849 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *, bool);
 850 static void cfil_sock_received_verdict(struct socket *so);
 851 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 852     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 853     boolean_t, boolean_t);
 854 static void cfil_stats_report_thread_func(void *, wait_result_t);
 855 static void cfil_stats_report(void *v, wait_result_t w);
 856
 857 bool check_port(struct sockaddr *, u_short);
 858
 859 /*
 860  * Content filter global read write lock
 861  */
 862
 863 static void
 864 cfil_rw_lock_exclusive(lck_rw_t *lck)
 865 {
 866         void *lr_saved;
 867
 868         lr_saved = __builtin_return_address(0);
 869
 870         lck_rw_lock_exclusive(lck);
 871
 872         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 873         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 874 }
 875
 876 static void
 877 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 878 {
 879         void *lr_saved;
 880
 881         lr_saved = __builtin_return_address(0);
 882
 883         lck_rw_unlock_exclusive(lck);
 884
 885         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 886         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 887 }
 888
 889 static void
 890 cfil_rw_lock_shared(lck_rw_t *lck)
 891 {
 892         void *lr_saved;
 893
 894         lr_saved = __builtin_return_address(0);
 895
 896         lck_rw_lock_shared(lck);
 897
 898         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 899         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 900 }
 901
 902 static void
 903 cfil_rw_unlock_shared(lck_rw_t *lck)
 904 {
 905         void *lr_saved;
 906
 907         lr_saved = __builtin_return_address(0);
 908
 909         lck_rw_unlock_shared(lck);
 910
 911         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 912         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 913 }
 914
 915 static boolean_t
 916 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 917 {
 918         void *lr_saved;
 919         boolean_t upgraded;
 920
 921         lr_saved = __builtin_return_address(0);
 922
 923         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 924         if (upgraded) {
 925                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 926                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 927         }
 928         return upgraded;
 929 }
 930
 931 static void
 932 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 933 {
 934         void *lr_saved;
 935
 936         lr_saved = __builtin_return_address(0);
 937
 938         lck_rw_lock_exclusive_to_shared(lck);
 939
 940         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 941         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 942 }
 943
 944 static void
 945 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 946 {
 947 #if !MACH_ASSERT
 948 #pragma unused(lck, exclusive)
 949 #endif
 950         LCK_RW_ASSERT(lck,
 951             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 952 }
 953
 954 /*
 955  * Return the number of bytes in the mbuf chain using the same
 956  * method as m_length() or sballoc()
 957  *
 958  * Returns data len - starting from PKT start
 959  * - retmbcnt - optional param to get total mbuf bytes in chain
 960  * - retmbnum - optional param to get number of mbufs in chain
 961  */
 962 static unsigned int
 963 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 964 {
 965         struct mbuf *m0;
 966         unsigned int pktlen = 0;
 967         int mbcnt;
 968         int mbnum;
 969
 970         // Locate the start of data
 971         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 972                 if (m0->m_flags & M_PKTHDR) {
 973                         break;
 974                 }
 975         }
 976         if (m0 == NULL) {
 977                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 978                 return 0;
 979         }
 980         m = m0;
 981
 982         if (retmbcnt == NULL && retmbnum == NULL) {
 983                 return m_length(m);
 984         }
 985
 986         pktlen = 0;
 987         mbcnt = 0;
 988         mbnum = 0;
 989         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 990                 pktlen += m0->m_len;
 991                 mbnum++;
 992                 mbcnt += MSIZE;
 993                 if (m0->m_flags & M_EXT) {
 994                         mbcnt += m0->m_ext.ext_size;
 995                 }
 996         }
 997         if (retmbcnt) {
 998                 *retmbcnt = mbcnt;
 999         }
1000         if (retmbnum) {
1001                 *retmbnum = mbnum;
1002         }
1003         return pktlen;
1004 }
1005
1006 static struct mbuf *
1007 cfil_data_start(struct mbuf *m)
1008 {
1009         struct mbuf *m0;
1010
1011         // Locate the start of data
1012         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
1013                 if (m0->m_flags & M_PKTHDR) {
1014                         break;
1015                 }
1016         }
1017         return m0;
1018 }
1019
1020 /*
1021  * Common mbuf queue utilities
1022  */
1023
1024 static inline void
1025 cfil_queue_init(struct cfil_queue *cfq)
1026 {
1027         cfq->q_start = 0;
1028         cfq->q_end = 0;
1029         MBUFQ_INIT(&cfq->q_mq);
1030 }
1031
1032 static inline uint64_t
1033 cfil_queue_drain(struct cfil_queue *cfq)
1034 {
1035         uint64_t drained = cfq->q_start - cfq->q_end;
1036         cfq->q_start = 0;
1037         cfq->q_end = 0;
1038         MBUFQ_DRAIN(&cfq->q_mq);
1039
1040         return drained;
1041 }
1042
1043 /* Return 1 when empty, 0 otherwise */
1044 static inline int
1045 cfil_queue_empty(struct cfil_queue *cfq)
1046 {
1047         return MBUFQ_EMPTY(&cfq->q_mq);
1048 }
1049
1050 static inline uint64_t
1051 cfil_queue_offset_first(struct cfil_queue *cfq)
1052 {
1053         return cfq->q_start;
1054 }
1055
1056 static inline uint64_t
1057 cfil_queue_offset_last(struct cfil_queue *cfq)
1058 {
1059         return cfq->q_end;
1060 }
1061
1062 static inline uint64_t
1063 cfil_queue_len(struct cfil_queue *cfq)
1064 {
1065         return cfq->q_end - cfq->q_start;
1066 }
1067
1068 /*
1069  * Routines to verify some fundamental assumptions
1070  */
1071
1072 static void
1073 cfil_queue_verify(struct cfil_queue *cfq)
1074 {
1075         mbuf_t chain;
1076         mbuf_t m;
1077         mbuf_t n;
1078         uint64_t queuesize = 0;
1079
1080         /* Verify offset are ordered */
1081         VERIFY(cfq->q_start <= cfq->q_end);
1082
1083         /*
1084          * When queue is empty, the offsets are equal otherwise the offsets
1085          * are different
1086          */
1087         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1088             (!MBUFQ_EMPTY(&cfq->q_mq) &&
1089             cfq->q_start != cfq->q_end));
1090
1091         MBUFQ_FOREACH(chain, &cfq->q_mq) {
1092                 size_t chainsize = 0;
1093                 m = chain;
1094                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1095                 // skip the addr and control stuff if present
1096                 m = cfil_data_start(m);
1097
1098                 if (m == NULL ||
1099                     m == (void *)M_TAG_FREE_PATTERN ||
1100                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1101                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1102                         panic("%s - mq %p is free at %p", __func__,
1103                             &cfq->q_mq, m);
1104                 }
1105                 for (n = m; n != NULL; n = n->m_next) {
1106                         if (n->m_type != MT_DATA &&
1107                             n->m_type != MT_HEADER &&
1108                             n->m_type != MT_OOBDATA) {
1109                                 panic("%s - %p unsupported type %u", __func__,
1110                                     n, n->m_type);
1111                         }
1112                         chainsize += n->m_len;
1113                 }
1114                 if (mlen != chainsize) {
1115                         panic("%s - %p m_length() %u != chainsize %lu",
1116                             __func__, m, mlen, chainsize);
1117                 }
1118                 queuesize += chainsize;
1119         }
1120         if (queuesize != cfq->q_end - cfq->q_start) {
1121                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1122                     m, queuesize, cfq->q_end - cfq->q_start);
1123         }
1124 }
1125
1126 static void
1127 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1128 {
1129         CFIL_QUEUE_VERIFY(cfq);
1130
1131         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1132         cfq->q_end += len;
1133
1134         CFIL_QUEUE_VERIFY(cfq);
1135 }
1136
1137 static void
1138 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1139 {
1140         CFIL_QUEUE_VERIFY(cfq);
1141
1142         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1143
1144         MBUFQ_REMOVE(&cfq->q_mq, m);
1145         MBUFQ_NEXT(m) = NULL;
1146         cfq->q_start += len;
1147
1148         CFIL_QUEUE_VERIFY(cfq);
1149 }
1150
1151 static mbuf_t
1152 cfil_queue_first(struct cfil_queue *cfq)
1153 {
1154         return MBUFQ_FIRST(&cfq->q_mq);
1155 }
1156
1157 static mbuf_t
1158 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1159 {
1160 #pragma unused(cfq)
1161         return MBUFQ_NEXT(m);
1162 }
1163
1164 static void
1165 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1166 {
1167         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1168         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1169
1170         /* Verify the queues are ordered so that pending is before ctl */
1171         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1172
1173         /* The peek offset cannot be less than the pass offset */
1174         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1175
1176         /* Make sure we've updated the offset we peeked at  */
1177         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1178 }
1179
1180 static void
1181 cfil_entry_verify(struct cfil_entry *entry)
1182 {
1183         cfil_entry_buf_verify(&entry->cfe_snd);
1184         cfil_entry_buf_verify(&entry->cfe_rcv);
1185 }
1186
1187 static void
1188 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1189 {
1190         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1191
1192         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1193 }
1194
1195 static void
1196 cfil_info_verify(struct cfil_info *cfil_info)
1197 {
1198         int i;
1199
1200         if (cfil_info == NULL) {
1201                 return;
1202         }
1203
1204         cfil_info_buf_verify(&cfil_info->cfi_snd);
1205         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1206
1207         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1208                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1209         }
1210 }
1211
1212 static void
1213 verify_content_filter(struct content_filter *cfc)
1214 {
1215         struct cfil_entry *entry;
1216         uint32_t count = 0;
1217
1218         VERIFY(cfc->cf_sock_count >= 0);
1219
1220         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1221                 count++;
1222                 VERIFY(cfc == entry->cfe_filter);
1223         }
1224         VERIFY(count == cfc->cf_sock_count);
1225 }
1226
1227 /*
1228  * Kernel control socket callbacks
1229  */
1230 static errno_t
1231 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1232     void **unitinfo)
1233 {
1234         errno_t error = 0;
1235         struct content_filter *cfc = NULL;
1236
1237         CFIL_LOG(LOG_NOTICE, "");
1238
1239         cfc = zalloc(content_filter_zone);
1240         if (cfc == NULL) {
1241                 CFIL_LOG(LOG_ERR, "zalloc failed");
1242                 error = ENOMEM;
1243                 goto done;
1244         }
1245         bzero(cfc, sizeof(struct content_filter));
1246
1247         cfil_rw_lock_exclusive(&cfil_lck_rw);
1248         if (content_filters == NULL) {
1249                 struct content_filter **tmp;
1250
1251                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1252
1253                 MALLOC(tmp,
1254                     struct content_filter **,
1255                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1256                     M_TEMP,
1257                     M_WAITOK | M_ZERO);
1258
1259                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1260
1261                 if (tmp == NULL && content_filters == NULL) {
1262                         error = ENOMEM;
1263                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1264                         goto done;
1265                 }
1266                 /* Another thread may have won the race */
1267                 if (content_filters != NULL) {
1268                         FREE(tmp, M_TEMP);
1269                 } else {
1270                         content_filters = tmp;
1271                 }
1272         }
1273
1274         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1275                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1276                 error = EINVAL;
1277         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1278                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1279                 error = EADDRINUSE;
1280         } else {
1281                 /*
1282                  * kernel control socket kcunit numbers start at 1
1283                  */
1284                 content_filters[sac->sc_unit - 1] = cfc;
1285
1286                 cfc->cf_kcref = kctlref;
1287                 cfc->cf_kcunit = sac->sc_unit;
1288                 TAILQ_INIT(&cfc->cf_sock_entries);
1289
1290                 *unitinfo = cfc;
1291                 cfil_active_count++;
1292
1293                 // Allocate periodic stats buffer for this filter
1294                 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1295                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1296
1297                         struct cfil_stats_report_buffer *buf;
1298
1299                         MALLOC(buf,
1300                             struct cfil_stats_report_buffer *,
1301                             sizeof(struct cfil_stats_report_buffer),
1302                             M_TEMP,
1303                             M_WAITOK | M_ZERO);
1304
1305                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1306
1307                         if (buf == NULL) {
1308                                 error = ENOMEM;
1309                                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1310                                 goto done;
1311                         }
1312
1313                         /* Another thread may have won the race */
1314                         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1315                                 FREE(buf, M_TEMP);
1316                         } else {
1317                                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1318                         }
1319                 }
1320         }
1321         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1322 done:
1323         if (error != 0 && cfc != NULL) {
1324                 zfree(content_filter_zone, cfc);
1325         }
1326
1327         if (error == 0) {
1328                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1329         } else {
1330                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1331         }
1332
1333         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1334             error, cfil_active_count, sac->sc_unit);
1335
1336         return error;
1337 }
1338
1339 static errno_t
1340 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1341 {
1342 #pragma unused(kctlref)
1343         errno_t error = 0;
1344         struct content_filter *cfc;
1345         struct cfil_entry *entry;
1346         uint64_t sock_flow_id = 0;
1347
1348         CFIL_LOG(LOG_NOTICE, "");
1349
1350         if (content_filters == NULL) {
1351                 CFIL_LOG(LOG_ERR, "no content filter");
1352                 error = EINVAL;
1353                 goto done;
1354         }
1355         if (kcunit > MAX_CONTENT_FILTER) {
1356                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1357                     kcunit, MAX_CONTENT_FILTER);
1358                 error = EINVAL;
1359                 goto done;
1360         }
1361
1362         cfc = (struct content_filter *)unitinfo;
1363         if (cfc == NULL) {
1364                 goto done;
1365         }
1366
1367         cfil_rw_lock_exclusive(&cfil_lck_rw);
1368         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1369                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1370                     kcunit);
1371                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1372                 goto done;
1373         }
1374         cfc->cf_flags |= CFF_DETACHING;
1375         /*
1376          * Remove all sockets from the filter
1377          */
1378         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1379                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1380
1381                 verify_content_filter(cfc);
1382                 /*
1383                  * Accept all outstanding data by pushing to next filter
1384                  * or back to socket
1385                  *
1386                  * TBD: Actually we should make sure all data has been pushed
1387                  * back to socket
1388                  */
1389                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1390                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1391                         struct socket *so = cfil_info->cfi_so;
1392                         sock_flow_id = cfil_info->cfi_sock_id;
1393
1394                         /* Need to let data flow immediately */
1395                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1396                             CFEF_DATA_START;
1397
1398                         /*
1399                          * Respect locking hierarchy
1400                          */
1401                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1402
1403                         socket_lock(so, 1);
1404
1405                         /*
1406                          * When cfe_filter is NULL the filter is detached
1407                          * and the entry has been removed from cf_sock_entries
1408                          */
1409                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1410                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1411                                 goto release;
1412                         }
1413
1414                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1415                             CFM_MAX_OFFSET,
1416                             CFM_MAX_OFFSET);
1417
1418                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1419                             CFM_MAX_OFFSET,
1420                             CFM_MAX_OFFSET);
1421
1422                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1423
1424                         /*
1425                          * Check again to make sure if the cfil_info is still valid
1426                          * as the socket may have been unlocked when when calling
1427                          * cfil_acquire_sockbuf()
1428                          */
1429                         if (entry->cfe_filter == NULL ||
1430                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1431                                 goto release;
1432                         }
1433
1434                         /* The filter is now detached */
1435                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1436 #if LIFECYCLE_DEBUG
1437                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1438 #endif
1439                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1440                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1441                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1442                             cfil_filters_attached(so) == 0) {
1443                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1444                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1445                                 wakeup((caddr_t)cfil_info);
1446                         }
1447
1448                         /*
1449                          * Remove the filter entry from the content filter
1450                          * but leave the rest of the state intact as the queues
1451                          * may not be empty yet
1452                          */
1453                         entry->cfe_filter = NULL;
1454                         entry->cfe_necp_control_unit = 0;
1455
1456                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1457                         cfc->cf_sock_count--;
1458 release:
1459                         socket_unlock(so, 1);
1460                 }
1461         }
1462         verify_content_filter(cfc);
1463
1464         /* Free the stats buffer for this filter */
1465         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1466                 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1467                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1468         }
1469         VERIFY(cfc->cf_sock_count == 0);
1470
1471         /*
1472          * Make filter inactive
1473          */
1474         content_filters[kcunit - 1] = NULL;
1475         cfil_active_count--;
1476         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1477
1478         if (cfc->cf_crypto_state != NULL) {
1479                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1480                 cfc->cf_crypto_state = NULL;
1481         }
1482
1483         zfree(content_filter_zone, cfc);
1484 done:
1485         if (error == 0) {
1486                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1487         } else {
1488                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1489         }
1490
1491         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1492             error, cfil_active_count, kcunit);
1493
1494         return error;
1495 }
1496
1497 /*
1498  * cfil_acquire_sockbuf()
1499  *
1500  * Prevent any other thread from acquiring the sockbuf
1501  * We use sb_cfil_thread as a semaphore to prevent other threads from
1502  * messing with the sockbuf -- see sblock()
1503  * Note: We do not set SB_LOCK here because the thread may check or modify
1504  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1505  * sblock(), sbunlock() or sodefunct()
1506  */
1507 static int
1508 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1509 {
1510         thread_t tp = current_thread();
1511         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1512         lck_mtx_t *mutex_held;
1513         int error = 0;
1514
1515         /*
1516          * Wait until no thread is holding the sockbuf and other content
1517          * filter threads have released the sockbuf
1518          */
1519         while ((sb->sb_flags & SB_LOCK) ||
1520             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1521                 if (so->so_proto->pr_getlock != NULL) {
1522                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1523                 } else {
1524                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1525                 }
1526
1527                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1528
1529                 sb->sb_wantlock++;
1530                 VERIFY(sb->sb_wantlock != 0);
1531
1532                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1533                     NULL);
1534
1535                 VERIFY(sb->sb_wantlock != 0);
1536                 sb->sb_wantlock--;
1537         }
1538         /*
1539          * Use reference count for repetitive calls on same thread
1540          */
1541         if (sb->sb_cfil_refs == 0) {
1542                 VERIFY(sb->sb_cfil_thread == NULL);
1543                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1544
1545                 sb->sb_cfil_thread = tp;
1546                 sb->sb_flags |= SB_LOCK;
1547         }
1548         sb->sb_cfil_refs++;
1549
1550         /* We acquire the socket buffer when we need to cleanup */
1551         if (cfil_info == NULL) {
1552                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1553                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1554                 error = 0;
1555         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1556                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1557                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1558                 error = EPIPE;
1559         }
1560
1561         return error;
1562 }
1563
1564 static void
1565 cfil_release_sockbuf(struct socket *so, int outgoing)
1566 {
1567         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1568         thread_t tp = current_thread();
1569
1570         socket_lock_assert_owned(so);
1571
1572         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1573                 panic("%s sb_cfil_thread %p not current %p", __func__,
1574                     sb->sb_cfil_thread, tp);
1575         }
1576         /*
1577          * Don't panic if we are defunct because SB_LOCK has
1578          * been cleared by sodefunct()
1579          */
1580         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1581                 panic("%s SB_LOCK not set on %p", __func__,
1582                     sb);
1583         }
1584         /*
1585          * We can unlock when the thread unwinds to the last reference
1586          */
1587         sb->sb_cfil_refs--;
1588         if (sb->sb_cfil_refs == 0) {
1589                 sb->sb_cfil_thread = NULL;
1590                 sb->sb_flags &= ~SB_LOCK;
1591
1592                 if (sb->sb_wantlock > 0) {
1593                         wakeup(&sb->sb_flags);
1594                 }
1595         }
1596 }
1597
1598 cfil_sock_id_t
1599 cfil_sock_id_from_socket(struct socket *so)
1600 {
1601         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1602                 return so->so_cfil->cfi_sock_id;
1603         } else {
1604                 return CFIL_SOCK_ID_NONE;
1605         }
1606 }
1607
1608 static bool
1609 cfil_socket_safe_lock(struct inpcb *inp)
1610 {
1611         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1612                 socket_lock(inp->inp_socket, 1);
1613                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1614                         return true;
1615                 }
1616                 socket_unlock(inp->inp_socket, 1);
1617         }
1618         return false;
1619 }
1620
1621 static struct socket *
1622 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1623 {
1624         struct socket *so = NULL;
1625         u_int64_t gencnt = cfil_sock_id >> 32;
1626         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1627         struct inpcb *inp = NULL;
1628         struct inpcbinfo *pcbinfo = NULL;
1629
1630 #if VERDICT_DEBUG
1631         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1632 #endif
1633
1634         if (udp_only) {
1635                 goto find_udp;
1636         }
1637
1638         pcbinfo = &tcbinfo;
1639         lck_rw_lock_shared(pcbinfo->ipi_lock);
1640         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1641                 if (inp->inp_state != INPCB_STATE_DEAD &&
1642                     inp->inp_socket != NULL &&
1643                     inp->inp_flowhash == flowhash &&
1644                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1645                     inp->inp_socket->so_cfil != NULL) {
1646                         if (cfil_socket_safe_lock(inp)) {
1647                                 so = inp->inp_socket;
1648                         }
1649                         break;
1650                 }
1651         }
1652         lck_rw_done(pcbinfo->ipi_lock);
1653         if (so != NULL) {
1654                 goto done;
1655         }
1656
1657 find_udp:
1658
1659         pcbinfo = &udbinfo;
1660         lck_rw_lock_shared(pcbinfo->ipi_lock);
1661         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1662                 if (inp->inp_state != INPCB_STATE_DEAD &&
1663                     inp->inp_socket != NULL &&
1664                     inp->inp_socket->so_cfil_db != NULL &&
1665                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1666                         if (cfil_socket_safe_lock(inp)) {
1667                                 so = inp->inp_socket;
1668                         }
1669                         break;
1670                 }
1671         }
1672         lck_rw_done(pcbinfo->ipi_lock);
1673
1674         pcbinfo = &ripcbinfo;
1675         lck_rw_lock_shared(pcbinfo->ipi_lock);
1676         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1677                 if (inp->inp_state != INPCB_STATE_DEAD &&
1678                     inp->inp_socket != NULL &&
1679                     inp->inp_socket->so_cfil_db != NULL &&
1680                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1681                         if (cfil_socket_safe_lock(inp)) {
1682                                 so = inp->inp_socket;
1683                         }
1684                         break;
1685                 }
1686         }
1687         lck_rw_done(pcbinfo->ipi_lock);
1688
1689 done:
1690         if (so == NULL) {
1691                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1692                 CFIL_LOG(LOG_DEBUG,
1693                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1694                     cfil_sock_id, gencnt, flowhash);
1695         }
1696
1697         return so;
1698 }
1699
1700 static struct socket *
1701 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1702 {
1703         struct socket *so = NULL;
1704         struct inpcb *inp = NULL;
1705         struct inpcbinfo *pcbinfo = &tcbinfo;
1706
1707         lck_rw_lock_shared(pcbinfo->ipi_lock);
1708         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1709                 if (inp->inp_state != INPCB_STATE_DEAD &&
1710                     inp->inp_socket != NULL &&
1711                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1712                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1713                         if (cfil_socket_safe_lock(inp)) {
1714                                 so = inp->inp_socket;
1715                         }
1716                         break;
1717                 }
1718         }
1719         lck_rw_done(pcbinfo->ipi_lock);
1720         if (so != NULL) {
1721                 goto done;
1722         }
1723
1724         pcbinfo = &udbinfo;
1725         lck_rw_lock_shared(pcbinfo->ipi_lock);
1726         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1727                 if (inp->inp_state != INPCB_STATE_DEAD &&
1728                     inp->inp_socket != NULL &&
1729                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1730                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1731                         if (cfil_socket_safe_lock(inp)) {
1732                                 so = inp->inp_socket;
1733                         }
1734                         break;
1735                 }
1736         }
1737         lck_rw_done(pcbinfo->ipi_lock);
1738
1739 done:
1740         return so;
1741 }
1742
1743 static void
1744 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1745 {
1746         struct cfil_info *cfil = NULL;
1747         Boolean found = FALSE;
1748         int kcunit;
1749
1750         if (cfil_info == NULL) {
1751                 return;
1752         }
1753
1754         if (report_frequency) {
1755                 if (entry == NULL) {
1756                         return;
1757                 }
1758
1759                 // Update stats reporting frequency.
1760                 if (entry->cfe_stats_report_frequency != report_frequency) {
1761                         entry->cfe_stats_report_frequency = report_frequency;
1762                         if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1763                                 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1764                         }
1765                         microuptime(&entry->cfe_stats_report_ts);
1766
1767                         // Insert cfil_info into list only if it is not in yet.
1768                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1769                                 if (cfil == cfil_info) {
1770                                         return;
1771                                 }
1772                         }
1773
1774                         TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1775
1776                         // Wake up stats thread if this is first flow added
1777                         if (cfil_sock_attached_stats_count == 0) {
1778                                 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1779                         }
1780                         cfil_sock_attached_stats_count++;
1781 #if STATS_DEBUG
1782                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1783                             cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1784                             cfil_info->cfi_sock_id,
1785                             entry->cfe_stats_report_frequency);
1786 #endif
1787                 }
1788         } else {
1789                 // Turn off stats reporting for this filter.
1790                 if (entry != NULL) {
1791                         // Already off, no change.
1792                         if (entry->cfe_stats_report_frequency == 0) {
1793                                 return;
1794                         }
1795
1796                         entry->cfe_stats_report_frequency = 0;
1797                         // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1798                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1799                                 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1800                                         return;
1801                                 }
1802                         }
1803                 }
1804
1805                 // No more filter asking for stats for this cfil_info, remove from list.
1806                 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1807                         found = FALSE;
1808                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1809                                 if (cfil == cfil_info) {
1810                                         found = TRUE;
1811                                         break;
1812                                 }
1813                         }
1814                         if (found) {
1815                                 cfil_sock_attached_stats_count--;
1816                                 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1817 #if STATS_DEBUG
1818                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1819                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1820                                     cfil_info->cfi_sock_id);
1821 #endif
1822                         }
1823                 }
1824         }
1825 }
1826
1827 static errno_t
1828 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1829     int flags)
1830 {
1831 #pragma unused(kctlref, flags)
1832         errno_t error = 0;
1833         struct cfil_msg_hdr *msghdr;
1834         struct content_filter *cfc = (struct content_filter *)unitinfo;
1835         struct socket *so;
1836         struct cfil_msg_action *action_msg;
1837         struct cfil_entry *entry;
1838         struct cfil_info *cfil_info = NULL;
1839         unsigned int data_len = 0;
1840
1841         CFIL_LOG(LOG_INFO, "");
1842
1843         if (content_filters == NULL) {
1844                 CFIL_LOG(LOG_ERR, "no content filter");
1845                 error = EINVAL;
1846                 goto done;
1847         }
1848         if (kcunit > MAX_CONTENT_FILTER) {
1849                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1850                     kcunit, MAX_CONTENT_FILTER);
1851                 error = EINVAL;
1852                 goto done;
1853         }
1854         if (m == NULL) {
1855                 CFIL_LOG(LOG_ERR, "null mbuf");
1856                 error = EINVAL;
1857                 goto done;
1858         }
1859         data_len = m_length(m);
1860
1861         if (data_len < sizeof(struct cfil_msg_hdr)) {
1862                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1863                 error = EINVAL;
1864                 goto done;
1865         }
1866         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1867         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1868                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1869                 error = EINVAL;
1870                 goto done;
1871         }
1872         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1873                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1874                 error = EINVAL;
1875                 goto done;
1876         }
1877         if (msghdr->cfm_len > data_len) {
1878                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1879                 error = EINVAL;
1880                 goto done;
1881         }
1882
1883         /* Validate action operation */
1884         switch (msghdr->cfm_op) {
1885         case CFM_OP_DATA_UPDATE:
1886                 OSIncrementAtomic(
1887                         &cfil_stats.cfs_ctl_action_data_update);
1888                 break;
1889         case CFM_OP_DROP:
1890                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1891                 break;
1892         case CFM_OP_BLESS_CLIENT:
1893                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1894                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1895                         error = EINVAL;
1896                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1897                             msghdr->cfm_len,
1898                             msghdr->cfm_op);
1899                         goto done;
1900                 }
1901                 error = cfil_action_bless_client(kcunit, msghdr);
1902                 goto done;
1903         case CFM_OP_SET_CRYPTO_KEY:
1904                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1905                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1906                         error = EINVAL;
1907                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1908                             msghdr->cfm_len,
1909                             msghdr->cfm_op);
1910                         goto done;
1911                 }
1912                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1913                 goto done;
1914         default:
1915                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1916                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1917                 error = EINVAL;
1918                 goto done;
1919         }
1920         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1921                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1922                 error = EINVAL;
1923                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1924                     msghdr->cfm_len,
1925                     msghdr->cfm_op);
1926                 goto done;
1927         }
1928         cfil_rw_lock_shared(&cfil_lck_rw);
1929         if (cfc != (void *)content_filters[kcunit - 1]) {
1930                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1931                     kcunit);
1932                 error = EINVAL;
1933                 cfil_rw_unlock_shared(&cfil_lck_rw);
1934                 goto done;
1935         }
1936         cfil_rw_unlock_shared(&cfil_lck_rw);
1937
1938         // Search for socket (TCP+UDP and lock so)
1939         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1940         if (so == NULL) {
1941                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1942                     msghdr->cfm_sock_id);
1943                 error = EINVAL;
1944                 goto done;
1945         }
1946
1947         cfil_info = so->so_cfil_db != NULL ?
1948             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1949
1950         // We should not obtain global lock here in order to avoid deadlock down the path.
1951         // But we attempt to retain a valid cfil_info to prevent any deallocation until
1952         // we are done.  Abort retain if cfil_info has already entered the free code path.
1953         if (cfil_info && os_ref_retain_try(&cfil_info->cfi_ref_count) == false) {
1954                 socket_unlock(so, 1);
1955                 goto done;
1956         }
1957
1958         if (cfil_info == NULL) {
1959                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1960                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1961                 error = EINVAL;
1962                 goto unlock;
1963         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1964                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1965                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1966                 error = EINVAL;
1967                 goto unlock;
1968         }
1969
1970         if (cfil_info->cfi_debug) {
1971                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED MSG FROM FILTER");
1972         }
1973
1974         entry = &cfil_info->cfi_entries[kcunit - 1];
1975         if (entry->cfe_filter == NULL) {
1976                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1977                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1978                 error = EINVAL;
1979                 goto unlock;
1980         }
1981
1982         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1983                 entry->cfe_flags |= CFEF_DATA_START;
1984         } else {
1985                 CFIL_LOG(LOG_ERR,
1986                     "so %llx attached not sent for %u",
1987                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1988                 error = EINVAL;
1989                 goto unlock;
1990         }
1991
1992         microuptime(&entry->cfe_last_action);
1993         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1994
1995         action_msg = (struct cfil_msg_action *)msghdr;
1996
1997         switch (msghdr->cfm_op) {
1998         case CFM_OP_DATA_UPDATE:
1999
2000                 if (cfil_info->cfi_debug) {
2001                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DATA_UPDATE");
2002                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2003                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2004                             cfil_info->cfi_sock_id,
2005                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2006                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2007                 }
2008
2009 #if VERDICT_DEBUG
2010                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2011                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2012                     cfil_info->cfi_sock_id,
2013                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2014                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2015 #endif
2016                 /*
2017                  * Received verdict, at this point we know this
2018                  * socket connection is allowed.  Unblock thread
2019                  * immediately before proceeding to process the verdict.
2020                  */
2021                 cfil_sock_received_verdict(so);
2022
2023                 if (action_msg->cfa_out_peek_offset != 0 ||
2024                     action_msg->cfa_out_pass_offset != 0) {
2025                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
2026                             action_msg->cfa_out_pass_offset,
2027                             action_msg->cfa_out_peek_offset);
2028                 }
2029                 if (error == EJUSTRETURN) {
2030                         error = 0;
2031                 }
2032                 if (error != 0) {
2033                         break;
2034                 }
2035                 if (action_msg->cfa_in_peek_offset != 0 ||
2036                     action_msg->cfa_in_pass_offset != 0) {
2037                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
2038                             action_msg->cfa_in_pass_offset,
2039                             action_msg->cfa_in_peek_offset);
2040                 }
2041                 if (error == EJUSTRETURN) {
2042                         error = 0;
2043                 }
2044
2045                 // Toggle stats reporting according to received verdict.
2046                 cfil_rw_lock_exclusive(&cfil_lck_rw);
2047                 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
2048                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2049
2050                 break;
2051
2052         case CFM_OP_DROP:
2053                 if (cfil_info->cfi_debug) {
2054                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: RECEIVED CFM_OP_DROP");
2055                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2056                             (uint64_t)VM_KERNEL_ADDRPERM(so),
2057                             cfil_info->cfi_sock_id,
2058                             action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2059                             action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2060                 }
2061
2062 #if VERDICT_DEBUG
2063                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
2064                     (uint64_t)VM_KERNEL_ADDRPERM(so),
2065                     cfil_info->cfi_sock_id,
2066                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
2067                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
2068 #endif
2069                 error = cfil_action_drop(so, cfil_info, kcunit);
2070                 cfil_sock_received_verdict(so);
2071                 break;
2072
2073         default:
2074                 error = EINVAL;
2075                 break;
2076         }
2077 unlock:
2078         CFIL_INFO_FREE(cfil_info)
2079         socket_unlock(so, 1);
2080 done:
2081         mbuf_freem(m);
2082
2083         if (error == 0) {
2084                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
2085         } else {
2086                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
2087         }
2088
2089         return error;
2090 }
2091
2092 static errno_t
2093 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2094     int opt, void *data, size_t *len)
2095 {
2096 #pragma unused(kctlref, opt)
2097         struct cfil_info *cfil_info = NULL;
2098         errno_t error = 0;
2099         struct content_filter *cfc = (struct content_filter *)unitinfo;
2100
2101         CFIL_LOG(LOG_NOTICE, "");
2102
2103         cfil_rw_lock_shared(&cfil_lck_rw);
2104
2105         if (content_filters == NULL) {
2106                 CFIL_LOG(LOG_ERR, "no content filter");
2107                 error = EINVAL;
2108                 goto done;
2109         }
2110         if (kcunit > MAX_CONTENT_FILTER) {
2111                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2112                     kcunit, MAX_CONTENT_FILTER);
2113                 error = EINVAL;
2114                 goto done;
2115         }
2116         if (cfc != (void *)content_filters[kcunit - 1]) {
2117                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2118                     kcunit);
2119                 error = EINVAL;
2120                 goto done;
2121         }
2122         switch (opt) {
2123         case CFIL_OPT_NECP_CONTROL_UNIT:
2124                 if (*len < sizeof(uint32_t)) {
2125                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2126                         error = EINVAL;
2127                         goto done;
2128                 }
2129                 if (data != NULL) {
2130                         *(uint32_t *)data = cfc->cf_necp_control_unit;
2131                 }
2132                 break;
2133         case CFIL_OPT_GET_SOCKET_INFO:
2134                 if (*len != sizeof(struct cfil_opt_sock_info)) {
2135                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2136                         error = EINVAL;
2137                         goto done;
2138                 }
2139                 if (data == NULL) {
2140                         CFIL_LOG(LOG_ERR, "data not passed");
2141                         error = EINVAL;
2142                         goto done;
2143                 }
2144
2145                 struct cfil_opt_sock_info *sock_info =
2146                     (struct cfil_opt_sock_info *) data;
2147
2148                 // Unlock here so that we never hold both cfil_lck_rw and the
2149                 // socket_lock at the same time. Otherwise, this can deadlock
2150                 // because soclose() takes the socket_lock and then exclusive
2151                 // cfil_lck_rw and we require the opposite order.
2152
2153                 // WARNING: Be sure to never use anything protected
2154                 //     by cfil_lck_rw beyond this point.
2155                 // WARNING: Be sure to avoid fallthrough and
2156                 //     goto return_already_unlocked from this branch.
2157                 cfil_rw_unlock_shared(&cfil_lck_rw);
2158
2159                 // Search (TCP+UDP) and lock socket
2160                 struct socket *sock =
2161                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2162                 if (sock == NULL) {
2163 #if LIFECYCLE_DEBUG
2164                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2165                             sock_info->cfs_sock_id);
2166 #endif
2167                         error = ENOENT;
2168                         goto return_already_unlocked;
2169                 }
2170
2171                 cfil_info = (sock->so_cfil_db != NULL) ?
2172                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2173
2174                 if (cfil_info == NULL) {
2175 #if LIFECYCLE_DEBUG
2176                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2177                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
2178 #endif
2179                         error = EINVAL;
2180                         socket_unlock(sock, 1);
2181                         goto return_already_unlocked;
2182                 }
2183
2184                 // Fill out family, type, and protocol
2185                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2186                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2187                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2188
2189                 // Source and destination addresses
2190                 struct inpcb *inp = sotoinpcb(sock);
2191                 if (inp->inp_vflag & INP_IPV6) {
2192                         struct in6_addr *laddr = NULL, *faddr = NULL;
2193                         u_int16_t lport = 0, fport = 0;
2194
2195                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2196                             &laddr, &faddr, &lport, &fport);
2197                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2198                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2199                 } else if (inp->inp_vflag & INP_IPV4) {
2200                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2201                         u_int16_t lport = 0, fport = 0;
2202
2203                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2204                             &laddr, &faddr, &lport, &fport);
2205                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2206                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2207                 }
2208
2209                 // Set the pid info
2210                 sock_info->cfs_pid = sock->last_pid;
2211                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2212
2213                 if (sock->so_flags & SOF_DELEGATED) {
2214                         sock_info->cfs_e_pid = sock->e_pid;
2215                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2216                 } else {
2217                         sock_info->cfs_e_pid = sock->last_pid;
2218                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2219                 }
2220
2221                 socket_unlock(sock, 1);
2222
2223                 goto return_already_unlocked;
2224         default:
2225                 error = ENOPROTOOPT;
2226                 break;
2227         }
2228 done:
2229         cfil_rw_unlock_shared(&cfil_lck_rw);
2230
2231         return error;
2232
2233 return_already_unlocked:
2234
2235         return error;
2236 }
2237
2238 static errno_t
2239 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2240     int opt, void *data, size_t len)
2241 {
2242 #pragma unused(kctlref, opt)
2243         errno_t error = 0;
2244         struct content_filter *cfc = (struct content_filter *)unitinfo;
2245
2246         CFIL_LOG(LOG_NOTICE, "");
2247
2248         cfil_rw_lock_exclusive(&cfil_lck_rw);
2249
2250         if (content_filters == NULL) {
2251                 CFIL_LOG(LOG_ERR, "no content filter");
2252                 error = EINVAL;
2253                 goto done;
2254         }
2255         if (kcunit > MAX_CONTENT_FILTER) {
2256                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2257                     kcunit, MAX_CONTENT_FILTER);
2258                 error = EINVAL;
2259                 goto done;
2260         }
2261         if (cfc != (void *)content_filters[kcunit - 1]) {
2262                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2263                     kcunit);
2264                 error = EINVAL;
2265                 goto done;
2266         }
2267         switch (opt) {
2268         case CFIL_OPT_NECP_CONTROL_UNIT:
2269                 if (len < sizeof(uint32_t)) {
2270                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2271                             "len too small %lu", len);
2272                         error = EINVAL;
2273                         goto done;
2274                 }
2275                 if (cfc->cf_necp_control_unit != 0) {
2276                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2277                             "already set %u",
2278                             cfc->cf_necp_control_unit);
2279                         error = EINVAL;
2280                         goto done;
2281                 }
2282                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2283                 break;
2284         default:
2285                 error = ENOPROTOOPT;
2286                 break;
2287         }
2288 done:
2289         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2290
2291         return error;
2292 }
2293
2294
2295 static void
2296 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2297 {
2298 #pragma unused(kctlref, flags)
2299         struct content_filter *cfc = (struct content_filter *)unitinfo;
2300         struct socket *so = NULL;
2301         int error;
2302         struct cfil_entry *entry;
2303         struct cfil_info *cfil_info = NULL;
2304
2305         CFIL_LOG(LOG_INFO, "");
2306
2307         if (content_filters == NULL) {
2308                 CFIL_LOG(LOG_ERR, "no content filter");
2309                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2310                 return;
2311         }
2312         if (kcunit > MAX_CONTENT_FILTER) {
2313                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2314                     kcunit, MAX_CONTENT_FILTER);
2315                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2316                 return;
2317         }
2318         cfil_rw_lock_shared(&cfil_lck_rw);
2319         if (cfc != (void *)content_filters[kcunit - 1]) {
2320                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2321                     kcunit);
2322                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2323                 goto done;
2324         }
2325         /* Let's assume the flow control is lifted */
2326         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2327                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2328                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2329                 }
2330
2331                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2332
2333                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2334                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2335         }
2336         /*
2337          * Flow control will be raised again as soon as an entry cannot enqueue
2338          * to the kernel control socket
2339          */
2340         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2341                 verify_content_filter(cfc);
2342
2343                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2344
2345                 /* Find an entry that is flow controlled */
2346                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2347                         if (entry->cfe_cfil_info == NULL ||
2348                             entry->cfe_cfil_info->cfi_so == NULL) {
2349                                 continue;
2350                         }
2351                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2352                                 continue;
2353                         }
2354                 }
2355                 if (entry == NULL) {
2356                         break;
2357                 }
2358
2359                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2360
2361                 cfil_info = entry->cfe_cfil_info;
2362                 so = cfil_info->cfi_so;
2363
2364                 cfil_rw_unlock_shared(&cfil_lck_rw);
2365                 socket_lock(so, 1);
2366
2367                 do {
2368                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2369                         if (error == 0) {
2370                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2371                         }
2372                         cfil_release_sockbuf(so, 1);
2373                         if (error != 0) {
2374                                 break;
2375                         }
2376
2377                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2378                         if (error == 0) {
2379                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2380                         }
2381                         cfil_release_sockbuf(so, 0);
2382                 } while (0);
2383
2384                 socket_lock_assert_owned(so);
2385                 socket_unlock(so, 1);
2386
2387                 cfil_rw_lock_shared(&cfil_lck_rw);
2388         }
2389 done:
2390         cfil_rw_unlock_shared(&cfil_lck_rw);
2391 }
2392
2393 void
2394 cfil_init(void)
2395 {
2396         struct kern_ctl_reg kern_ctl;
2397         errno_t error = 0;
2398         unsigned int mbuf_limit = 0;
2399
2400         CFIL_LOG(LOG_NOTICE, "");
2401
2402         /*
2403          * Compile time verifications
2404          */
2405         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2406         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2407         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2408         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2409
2410         /*
2411          * Runtime time verifications
2412          */
2413         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2414             sizeof(uint32_t)));
2415         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2416             sizeof(uint32_t)));
2417         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2418             sizeof(uint32_t)));
2419         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2420             sizeof(uint32_t)));
2421
2422         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2423             sizeof(uint32_t)));
2424         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2425             sizeof(uint32_t)));
2426
2427         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2428             sizeof(uint32_t)));
2429         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2430             sizeof(uint32_t)));
2431         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2432             sizeof(uint32_t)));
2433         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2434             sizeof(uint32_t)));
2435
2436         /*
2437          * Allocate locks
2438          */
2439         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2440         if (cfil_lck_grp_attr == NULL) {
2441                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2442                 /* NOTREACHED */
2443         }
2444         cfil_lck_grp = lck_grp_alloc_init("content filter",
2445             cfil_lck_grp_attr);
2446         if (cfil_lck_grp == NULL) {
2447                 panic("%s: lck_grp_alloc_init failed", __func__);
2448                 /* NOTREACHED */
2449         }
2450         cfil_lck_attr = lck_attr_alloc_init();
2451         if (cfil_lck_attr == NULL) {
2452                 panic("%s: lck_attr_alloc_init failed", __func__);
2453                 /* NOTREACHED */
2454         }
2455         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2456
2457         TAILQ_INIT(&cfil_sock_head);
2458         TAILQ_INIT(&cfil_sock_head_stats);
2459
2460         /*
2461          * Register kernel control
2462          */
2463         bzero(&kern_ctl, sizeof(kern_ctl));
2464         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2465             sizeof(kern_ctl.ctl_name));
2466         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2467         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2468         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2469         kern_ctl.ctl_connect = cfil_ctl_connect;
2470         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2471         kern_ctl.ctl_send = cfil_ctl_send;
2472         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2473         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2474         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2475         error = ctl_register(&kern_ctl, &cfil_kctlref);
2476         if (error != 0) {
2477                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2478                 return;
2479         }
2480
2481         // Spawn thread for gargage collection
2482         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2483             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2484                 panic_plain("%s: Can't create UDP GC thread", __func__);
2485                 /* NOTREACHED */
2486         }
2487         /* this must not fail */
2488         VERIFY(cfil_udp_gc_thread != NULL);
2489
2490         // Spawn thread for statistics reporting
2491         if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2492             &cfil_stats_report_thread) != KERN_SUCCESS) {
2493                 panic_plain("%s: Can't create statistics report thread", __func__);
2494                 /* NOTREACHED */
2495         }
2496         /* this must not fail */
2497         VERIFY(cfil_stats_report_thread != NULL);
2498
2499         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2500         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2501         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2502         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2503
2504         memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2505 }
2506
2507 struct cfil_info *
2508 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2509 {
2510         int kcunit;
2511         struct cfil_info *cfil_info = NULL;
2512         struct inpcb *inp = sotoinpcb(so);
2513
2514         CFIL_LOG(LOG_INFO, "");
2515
2516         socket_lock_assert_owned(so);
2517
2518         cfil_info = zalloc(cfil_info_zone);
2519         if (cfil_info == NULL) {
2520                 goto done;
2521         }
2522         bzero(cfil_info, sizeof(struct cfil_info));
2523         os_ref_init(&cfil_info->cfi_ref_count, &cfil_refgrp);
2524
2525         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2526         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2527
2528         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2529                 struct cfil_entry *entry;
2530
2531                 entry = &cfil_info->cfi_entries[kcunit - 1];
2532                 entry->cfe_cfil_info = cfil_info;
2533
2534                 /* Initialize the filter entry */
2535                 entry->cfe_filter = NULL;
2536                 entry->cfe_flags = 0;
2537                 entry->cfe_necp_control_unit = 0;
2538                 entry->cfe_snd.cfe_pass_offset = 0;
2539                 entry->cfe_snd.cfe_peek_offset = 0;
2540                 entry->cfe_snd.cfe_peeked = 0;
2541                 entry->cfe_rcv.cfe_pass_offset = 0;
2542                 entry->cfe_rcv.cfe_peek_offset = 0;
2543                 entry->cfe_rcv.cfe_peeked = 0;
2544                 /*
2545                  * Timestamp the last action to avoid pre-maturely
2546                  * triggering garbage collection
2547                  */
2548                 microuptime(&entry->cfe_last_action);
2549
2550                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2551                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2552                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2553                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2554         }
2555
2556         cfil_rw_lock_exclusive(&cfil_lck_rw);
2557
2558         /*
2559          * Create a cfi_sock_id that's not the socket pointer!
2560          */
2561
2562         if (hash_entry == NULL) {
2563                 // This is the TCP case, cfil_info is tracked per socket
2564                 if (inp->inp_flowhash == 0) {
2565                         inp->inp_flowhash = inp_calc_flowhash(inp);
2566                 }
2567
2568                 so->so_cfil = cfil_info;
2569                 cfil_info->cfi_so = so;
2570                 cfil_info->cfi_sock_id =
2571                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2572         } else {
2573                 // This is the UDP case, cfil_info is tracked in per-socket hash
2574                 cfil_info->cfi_so = so;
2575                 hash_entry->cfentry_cfil = cfil_info;
2576                 cfil_info->cfi_hash_entry = hash_entry;
2577                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2578                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2579                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2580
2581                 // Wake up gc thread if this is first flow added
2582                 if (cfil_sock_udp_attached_count == 0) {
2583                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2584                 }
2585
2586                 cfil_sock_udp_attached_count++;
2587         }
2588
2589         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2590         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2591
2592         cfil_sock_attached_count++;
2593
2594         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2595
2596 done:
2597         if (cfil_info != NULL) {
2598                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2599         } else {
2600                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2601         }
2602
2603         return cfil_info;
2604 }
2605
2606 int
2607 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2608 {
2609         int kcunit;
2610         int attached = 0;
2611
2612         CFIL_LOG(LOG_INFO, "");
2613
2614         socket_lock_assert_owned(so);
2615
2616         cfil_rw_lock_exclusive(&cfil_lck_rw);
2617
2618         for (kcunit = 1;
2619             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2620             kcunit++) {
2621                 struct content_filter *cfc = content_filters[kcunit - 1];
2622                 struct cfil_entry *entry;
2623                 struct cfil_entry *iter_entry;
2624                 struct cfil_entry *iter_prev;
2625
2626                 if (cfc == NULL) {
2627                         continue;
2628                 }
2629                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2630                         continue;
2631                 }
2632
2633                 entry = &cfil_info->cfi_entries[kcunit - 1];
2634
2635                 entry->cfe_filter = cfc;
2636                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2637                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2638                 cfc->cf_sock_count++;
2639
2640                 /* Insert the entry into the list ordered by control unit */
2641                 iter_prev = NULL;
2642                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2643                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2644                                 break;
2645                         }
2646                         iter_prev = iter_entry;
2647                 }
2648
2649                 if (iter_prev == NULL) {
2650                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2651                 } else {
2652                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2653                 }
2654
2655                 verify_content_filter(cfc);
2656                 attached = 1;
2657                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2658         }
2659
2660         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2661
2662         return attached;
2663 }
2664
2665 static void
2666 cfil_info_free(struct cfil_info *cfil_info)
2667 {
2668         int kcunit;
2669         uint64_t in_drain = 0;
2670         uint64_t out_drained = 0;
2671
2672         if (cfil_info == NULL) {
2673                 return;
2674         }
2675
2676         CFIL_LOG(LOG_INFO, "");
2677
2678         cfil_rw_lock_exclusive(&cfil_lck_rw);
2679
2680         for (kcunit = 1;
2681             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2682             kcunit++) {
2683                 struct cfil_entry *entry;
2684                 struct content_filter *cfc;
2685
2686                 entry = &cfil_info->cfi_entries[kcunit - 1];
2687
2688                 /* Don't be silly and try to detach twice */
2689                 if (entry->cfe_filter == NULL) {
2690                         continue;
2691                 }
2692
2693                 cfc = content_filters[kcunit - 1];
2694
2695                 VERIFY(cfc == entry->cfe_filter);
2696
2697                 entry->cfe_filter = NULL;
2698                 entry->cfe_necp_control_unit = 0;
2699                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2700                 cfc->cf_sock_count--;
2701
2702                 verify_content_filter(cfc);
2703         }
2704         if (cfil_info->cfi_hash_entry != NULL) {
2705                 cfil_sock_udp_attached_count--;
2706         }
2707         cfil_sock_attached_count--;
2708         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2709
2710         // Turn off stats reporting for cfil_info.
2711         cfil_info_stats_toggle(cfil_info, NULL, 0);
2712
2713         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2714         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2715
2716         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2717                 struct cfil_entry *entry;
2718
2719                 entry = &cfil_info->cfi_entries[kcunit - 1];
2720                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2721                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2722                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2723                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2724         }
2725         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2726
2727         if (out_drained) {
2728                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2729         }
2730         if (in_drain) {
2731                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2732         }
2733
2734         zfree(cfil_info_zone, cfil_info);
2735 }
2736
2737 /*
2738  * Received a verdict from userspace for a socket.
2739  * Perform any delayed operation if needed.
2740  */
2741 static void
2742 cfil_sock_received_verdict(struct socket *so)
2743 {
2744         if (so == NULL || so->so_cfil == NULL) {
2745                 return;
2746         }
2747
2748         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2749
2750         /*
2751          * If socket has already been connected, trigger
2752          * soisconnected now.
2753          */
2754         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2755                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2756                 soisconnected(so);
2757                 return;
2758         }
2759 }
2760
2761 /*
2762  * Entry point from Sockets layer
2763  * The socket is locked.
2764  *
2765  * Checks if a connected socket is subject to filter and
2766  * pending the initial verdict.
2767  */
2768 boolean_t
2769 cfil_sock_connected_pending_verdict(struct socket *so)
2770 {
2771         if (so == NULL || so->so_cfil == NULL) {
2772                 return false;
2773         }
2774
2775         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2776                 return false;
2777         } else {
2778                 /*
2779                  * Remember that this protocol is already connected, so
2780                  * we will trigger soisconnected() upon receipt of
2781                  * initial verdict later.
2782                  */
2783                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2784                 return true;
2785         }
2786 }
2787
2788 boolean_t
2789 cfil_filter_present(void)
2790 {
2791         return cfil_active_count > 0;
2792 }
2793
2794 /*
2795  * Entry point from Sockets layer
2796  * The socket is locked.
2797  */
2798 errno_t
2799 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2800 {
2801         errno_t error = 0;
2802         uint32_t filter_control_unit;
2803
2804         socket_lock_assert_owned(so);
2805
2806         if (so->so_flags1 & SOF1_FLOW_DIVERT_SKIP) {
2807                 /*
2808                  * This socket has already been evaluated (and ultimately skipped) by
2809                  * flow divert, so it has also already been through content filter if there
2810                  * is one.
2811                  */
2812                 goto done;
2813         }
2814
2815         /* Limit ourselves to TCP that are not MPTCP subflows */
2816         if (SKIP_FILTER_FOR_TCP_SOCKET(so)) {
2817                 goto done;
2818         }
2819
2820         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2821         if (filter_control_unit == 0) {
2822                 goto done;
2823         }
2824
2825         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2826                 goto done;
2827         }
2828         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2829                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2830                 goto done;
2831         }
2832         if (cfil_active_count == 0) {
2833                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2834                 goto done;
2835         }
2836         if (so->so_cfil != NULL) {
2837                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2838                 CFIL_LOG(LOG_ERR, "already attached");
2839         } else {
2840                 cfil_info_alloc(so, NULL);
2841                 if (so->so_cfil == NULL) {
2842                         error = ENOMEM;
2843                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2844                         goto done;
2845                 }
2846                 so->so_cfil->cfi_dir = dir;
2847                 so->so_cfil->cfi_filter_control_unit = filter_control_unit;
2848         }
2849         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2850                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2851                     filter_control_unit);
2852                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2853                 goto done;
2854         }
2855         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2856             (uint64_t)VM_KERNEL_ADDRPERM(so),
2857             filter_control_unit, so->so_cfil->cfi_sock_id);
2858
2859         so->so_flags |= SOF_CONTENT_FILTER;
2860         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2861
2862         /* Hold a reference on the socket */
2863         so->so_usecount++;
2864
2865         /*
2866          * Save passed addresses for attach event msg (in case resend
2867          * is needed.
2868          */
2869         if (remote != NULL && (remote->sa_len <= sizeof(union sockaddr_in_4_6))) {
2870                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2871         }
2872         if (local != NULL && (local->sa_len <= sizeof(union sockaddr_in_4_6))) {
2873                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2874         }
2875
2876         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2877         /* We can recover from flow control or out of memory errors */
2878         if (error == ENOBUFS || error == ENOMEM) {
2879                 error = 0;
2880         } else if (error != 0) {
2881                 goto done;
2882         }
2883
2884         CFIL_INFO_VERIFY(so->so_cfil);
2885 done:
2886         return error;
2887 }
2888
2889 /*
2890  * Entry point from Sockets layer
2891  * The socket is locked.
2892  */
2893 errno_t
2894 cfil_sock_detach(struct socket *so)
2895 {
2896         if (IS_IP_DGRAM(so)) {
2897                 cfil_db_free(so);
2898                 return 0;
2899         }
2900
2901         if (so->so_cfil) {
2902                 if (so->so_flags & SOF_CONTENT_FILTER) {
2903                         so->so_flags &= ~SOF_CONTENT_FILTER;
2904                         VERIFY(so->so_usecount > 0);
2905                         so->so_usecount--;
2906                 }
2907                 CFIL_INFO_FREE(so->so_cfil);
2908                 so->so_cfil = NULL;
2909                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2910         }
2911         return 0;
2912 }
2913
2914 /*
2915  * Fill in the address info of an event message from either
2916  * the socket or passed in address info.
2917  */
2918 static void
2919 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2920     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2921     boolean_t isIPv4, boolean_t outgoing)
2922 {
2923         if (isIPv4) {
2924                 struct in_addr laddr = {0}, faddr = {0};
2925                 u_int16_t lport = 0, fport = 0;
2926
2927                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2928
2929                 if (outgoing) {
2930                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2931                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2932                 } else {
2933                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2934                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2935                 }
2936         } else {
2937                 struct in6_addr *laddr = NULL, *faddr = NULL;
2938                 u_int16_t lport = 0, fport = 0;
2939
2940                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2941                 if (outgoing) {
2942                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2943                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2944                 } else {
2945                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2946                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2947                 }
2948         }
2949 }
2950
2951 static boolean_t
2952 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2953     struct cfil_info *cfil_info,
2954     struct cfil_msg_sock_attached *msg)
2955 {
2956         struct cfil_crypto_data data = {};
2957
2958         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2959                 return false;
2960         }
2961
2962         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2963         data.direction = msg->cfs_conn_dir;
2964
2965         data.pid = msg->cfs_pid;
2966         data.effective_pid = msg->cfs_e_pid;
2967         uuid_copy(data.uuid, msg->cfs_uuid);
2968         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2969         data.socketProtocol = msg->cfs_sock_protocol;
2970         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2971                 data.remote.sin6 = msg->cfs_dst.sin6;
2972                 data.local.sin6 = msg->cfs_src.sin6;
2973         } else {
2974                 data.remote.sin6 = msg->cfs_src.sin6;
2975                 data.local.sin6 = msg->cfs_dst.sin6;
2976         }
2977
2978         // At attach, if local address is already present, no need to re-sign subsequent data messages.
2979         if (!NULLADDRESS(data.local)) {
2980                 cfil_info->cfi_isSignatureLatest = true;
2981         }
2982
2983         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2984         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2985                 msg->cfs_signature_length = 0;
2986                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2987                     msg->cfs_msghdr.cfm_sock_id);
2988                 return false;
2989         }
2990
2991         return true;
2992 }
2993
2994 static boolean_t
2995 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2996     struct socket *so, struct cfil_info *cfil_info,
2997     struct cfil_msg_data_event *msg)
2998 {
2999         struct cfil_crypto_data data = {};
3000
3001         if (crypto_state == NULL || msg == NULL ||
3002             so == NULL || cfil_info == NULL) {
3003                 return false;
3004         }
3005
3006         data.sock_id = cfil_info->cfi_sock_id;
3007         data.direction = cfil_info->cfi_dir;
3008         data.pid = so->last_pid;
3009         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3010         if (so->so_flags & SOF_DELEGATED) {
3011                 data.effective_pid = so->e_pid;
3012                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3013         } else {
3014                 data.effective_pid = so->last_pid;
3015                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3016         }
3017         data.socketProtocol = so->so_proto->pr_protocol;
3018
3019         if (data.direction == CFS_CONNECTION_DIR_OUT) {
3020                 data.remote.sin6 = msg->cfc_dst.sin6;
3021                 data.local.sin6 = msg->cfc_src.sin6;
3022         } else {
3023                 data.remote.sin6 = msg->cfc_src.sin6;
3024                 data.local.sin6 = msg->cfc_dst.sin6;
3025         }
3026
3027         // At first data, local address may show up for the first time, update address cache and
3028         // no need to re-sign subsequent data messages anymore.
3029         if (!NULLADDRESS(data.local)) {
3030                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
3031                 cfil_info->cfi_isSignatureLatest = true;
3032         }
3033
3034         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
3035         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
3036                 msg->cfd_signature_length = 0;
3037                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
3038                     msg->cfd_msghdr.cfm_sock_id);
3039                 return false;
3040         }
3041
3042         return true;
3043 }
3044
3045 static boolean_t
3046 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
3047     struct socket *so, struct cfil_info *cfil_info,
3048     struct cfil_msg_sock_closed *msg)
3049 {
3050         struct cfil_crypto_data data = {};
3051         struct cfil_hash_entry hash_entry = {};
3052         struct cfil_hash_entry *hash_entry_ptr = NULL;
3053         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3054
3055         if (crypto_state == NULL || msg == NULL ||
3056             so == NULL || inp == NULL || cfil_info == NULL) {
3057                 return false;
3058         }
3059
3060         data.sock_id = cfil_info->cfi_sock_id;
3061         data.direction = cfil_info->cfi_dir;
3062
3063         data.pid = so->last_pid;
3064         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3065         if (so->so_flags & SOF_DELEGATED) {
3066                 data.effective_pid = so->e_pid;
3067                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3068         } else {
3069                 data.effective_pid = so->last_pid;
3070                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3071         }
3072         data.socketProtocol = so->so_proto->pr_protocol;
3073
3074         /*
3075          * Fill in address info:
3076          * For UDP, use the cfil_info hash entry directly.
3077          * For TCP, compose an hash entry with the saved addresses.
3078          */
3079         if (cfil_info->cfi_hash_entry != NULL) {
3080                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3081         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3082             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3083                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3084                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3085                 hash_entry_ptr = &hash_entry;
3086         }
3087         if (hash_entry_ptr != NULL) {
3088                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3089                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3090                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3091                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, !IS_INP_V6(inp), outgoing);
3092         }
3093
3094         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3095         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3096
3097         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3098         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3099                 msg->cfc_signature_length = 0;
3100                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3101                     msg->cfc_msghdr.cfm_sock_id);
3102                 return false;
3103         }
3104
3105         return true;
3106 }
3107
3108 static int
3109 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3110     uint32_t kcunit, int conn_dir)
3111 {
3112         errno_t error = 0;
3113         struct cfil_entry *entry = NULL;
3114         struct cfil_msg_sock_attached msg_attached;
3115         struct content_filter *cfc = NULL;
3116         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3117         struct cfil_hash_entry *hash_entry_ptr = NULL;
3118         struct cfil_hash_entry hash_entry;
3119
3120         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3121         proc_t p = PROC_NULL;
3122         task_t t = TASK_NULL;
3123
3124         socket_lock_assert_owned(so);
3125
3126         cfil_rw_lock_shared(&cfil_lck_rw);
3127
3128         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3129                 error = EINVAL;
3130                 goto done;
3131         }
3132
3133         if (kcunit == 0) {
3134                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3135         } else {
3136                 entry = &cfil_info->cfi_entries[kcunit - 1];
3137         }
3138
3139         if (entry == NULL) {
3140                 goto done;
3141         }
3142
3143         cfc = entry->cfe_filter;
3144         if (cfc == NULL) {
3145                 goto done;
3146         }
3147
3148         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3149                 goto done;
3150         }
3151
3152         if (kcunit == 0) {
3153                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3154         }
3155
3156         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3157             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3158
3159         /* Would be wasteful to try when flow controlled */
3160         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3161                 error = ENOBUFS;
3162                 goto done;
3163         }
3164
3165         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3166         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3167         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3168         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3169         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3170         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3171
3172         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3173         msg_attached.cfs_sock_type = so->so_proto->pr_type;
3174         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3175         msg_attached.cfs_pid = so->last_pid;
3176         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3177         if (so->so_flags & SOF_DELEGATED) {
3178                 msg_attached.cfs_e_pid = so->e_pid;
3179                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3180         } else {
3181                 msg_attached.cfs_e_pid = so->last_pid;
3182                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3183         }
3184
3185         /*
3186          * Fill in address info:
3187          * For UDP, use the cfil_info hash entry directly.
3188          * For TCP, compose an hash entry with the saved addresses.
3189          */
3190         if (cfil_info->cfi_hash_entry != NULL) {
3191                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3192         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3193             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3194                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa, FALSE);
3195                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa, FALSE);
3196                 hash_entry_ptr = &hash_entry;
3197         }
3198         if (hash_entry_ptr != NULL) {
3199                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3200                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
3201                     !IS_INP_V6(inp), conn_dir == CFS_CONNECTION_DIR_OUT);
3202         }
3203         msg_attached.cfs_conn_dir = conn_dir;
3204
3205         if (msg_attached.cfs_e_pid != 0) {
3206                 p = proc_find(msg_attached.cfs_e_pid);
3207                 if (p != PROC_NULL) {
3208                         t = proc_task(p);
3209                         if (t != TASK_NULL) {
3210                                 audit_token_t audit_token;
3211                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3212                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3213                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3214                                 } else {
3215                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3216                                             entry->cfe_cfil_info->cfi_sock_id);
3217                                 }
3218                         }
3219                         proc_rele(p);
3220                 }
3221         }
3222
3223         if (cfil_info->cfi_debug) {
3224                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING ATTACH UP");
3225         }
3226
3227         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3228
3229 #if LIFECYCLE_DEBUG
3230         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3231             entry->cfe_cfil_info->cfi_sock_id);
3232 #endif
3233
3234         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3235             entry->cfe_filter->cf_kcunit,
3236             &msg_attached,
3237             sizeof(struct cfil_msg_sock_attached),
3238             CTL_DATA_EOR);
3239         if (error != 0) {
3240                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3241                 goto done;
3242         }
3243         microuptime(&entry->cfe_last_event);
3244         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3245         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3246
3247         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3248         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3249 done:
3250
3251         /* We can recover from flow control */
3252         if (error == ENOBUFS) {
3253                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3254                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3255
3256                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3257                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3258                 }
3259
3260                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3261
3262                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3263         } else {
3264                 if (error != 0) {
3265                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3266                 }
3267
3268                 cfil_rw_unlock_shared(&cfil_lck_rw);
3269         }
3270         return error;
3271 }
3272
3273 static int
3274 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3275 {
3276         errno_t error = 0;
3277         struct mbuf *msg = NULL;
3278         struct cfil_entry *entry;
3279         struct cfe_buf *entrybuf;
3280         struct cfil_msg_hdr msg_disconnected;
3281         struct content_filter *cfc;
3282
3283         socket_lock_assert_owned(so);
3284
3285         cfil_rw_lock_shared(&cfil_lck_rw);
3286
3287         entry = &cfil_info->cfi_entries[kcunit - 1];
3288         if (outgoing) {
3289                 entrybuf = &entry->cfe_snd;
3290         } else {
3291                 entrybuf = &entry->cfe_rcv;
3292         }
3293
3294         cfc = entry->cfe_filter;
3295         if (cfc == NULL) {
3296                 goto done;
3297         }
3298
3299         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3300             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3301
3302         /*
3303          * Send the disconnection event once
3304          */
3305         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3306             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3307                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3308                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3309                 goto done;
3310         }
3311
3312         /*
3313          * We're not disconnected as long as some data is waiting
3314          * to be delivered to the filter
3315          */
3316         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3317                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3318                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3319                 error = EBUSY;
3320                 goto done;
3321         }
3322         /* Would be wasteful to try when flow controlled */
3323         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3324                 error = ENOBUFS;
3325                 goto done;
3326         }
3327
3328         if (cfil_info->cfi_debug) {
3329                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DISCONNECT UP");
3330         }
3331
3332 #if LIFECYCLE_DEBUG
3333         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3334             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3335             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3336 #endif
3337
3338         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3339         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3340         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3341         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3342         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3343             CFM_OP_DISCONNECT_IN;
3344         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3345         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3346             entry->cfe_filter->cf_kcunit,
3347             &msg_disconnected,
3348             sizeof(struct cfil_msg_hdr),
3349             CTL_DATA_EOR);
3350         if (error != 0) {
3351                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3352                 mbuf_freem(msg);
3353                 goto done;
3354         }
3355         microuptime(&entry->cfe_last_event);
3356         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3357
3358         /* Remember we have sent the disconnection message */
3359         if (outgoing) {
3360                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3361                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3362         } else {
3363                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3364                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3365         }
3366 done:
3367         if (error == ENOBUFS) {
3368                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3369                 OSIncrementAtomic(
3370                         &cfil_stats.cfs_disconnect_event_flow_control);
3371
3372                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3373                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3374                 }
3375
3376                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3377
3378                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3379         } else {
3380                 if (error != 0) {
3381                         OSIncrementAtomic(
3382                                 &cfil_stats.cfs_disconnect_event_fail);
3383                 }
3384
3385                 cfil_rw_unlock_shared(&cfil_lck_rw);
3386         }
3387         return error;
3388 }
3389
3390 int
3391 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3392 {
3393         struct cfil_entry *entry;
3394         struct cfil_msg_sock_closed msg_closed;
3395         errno_t error = 0;
3396         struct content_filter *cfc;
3397
3398         socket_lock_assert_owned(so);
3399
3400         cfil_rw_lock_shared(&cfil_lck_rw);
3401
3402         entry = &cfil_info->cfi_entries[kcunit - 1];
3403         cfc = entry->cfe_filter;
3404         if (cfc == NULL) {
3405                 goto done;
3406         }
3407
3408         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3409             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3410
3411         /* Would be wasteful to try when flow controlled */
3412         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3413                 error = ENOBUFS;
3414                 goto done;
3415         }
3416         /*
3417          * Send a single closed message per filter
3418          */
3419         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3420                 goto done;
3421         }
3422         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3423                 goto done;
3424         }
3425
3426         microuptime(&entry->cfe_last_event);
3427         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3428
3429         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3430         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3431         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3432         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3433         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3434         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3435         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3436         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3437         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3438         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3439         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3440         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3441         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3442
3443         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3444
3445         if (cfil_info->cfi_debug) {
3446                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING CLOSED UP");
3447         }
3448
3449 #if LIFECYCLE_DEBUG
3450         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3451 #endif
3452         /* for debugging
3453          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3454          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3455          *  }
3456          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3457          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3458          *  }
3459          */
3460
3461         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3462             entry->cfe_filter->cf_kcunit,
3463             &msg_closed,
3464             sizeof(struct cfil_msg_sock_closed),
3465             CTL_DATA_EOR);
3466         if (error != 0) {
3467                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3468                     error);
3469                 goto done;
3470         }
3471
3472         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3473         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3474 done:
3475         /* We can recover from flow control */
3476         if (error == ENOBUFS) {
3477                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3478                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3479
3480                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3481                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3482                 }
3483
3484                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3485
3486                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3487         } else {
3488                 if (error != 0) {
3489                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3490                 }
3491
3492                 cfil_rw_unlock_shared(&cfil_lck_rw);
3493         }
3494
3495         return error;
3496 }
3497
3498 static void
3499 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3500     struct in6_addr *ip6, u_int16_t port)
3501 {
3502         if (sin46 == NULL) {
3503                 return;
3504         }
3505
3506         struct sockaddr_in6 *sin6 = &sin46->sin6;
3507
3508         sin6->sin6_family = AF_INET6;
3509         sin6->sin6_len = sizeof(*sin6);
3510         sin6->sin6_port = port;
3511         sin6->sin6_addr = *ip6;
3512         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3513                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3514                 sin6->sin6_addr.s6_addr16[1] = 0;
3515         }
3516 }
3517
3518 static void
3519 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3520     struct in_addr ip, u_int16_t port)
3521 {
3522         if (sin46 == NULL) {
3523                 return;
3524         }
3525
3526         struct sockaddr_in *sin = &sin46->sin;
3527
3528         sin->sin_family = AF_INET;
3529         sin->sin_len = sizeof(*sin);
3530         sin->sin_port = port;
3531         sin->sin_addr.s_addr = ip.s_addr;
3532 }
3533
3534 static void
3535 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3536     struct in6_addr **laddr, struct in6_addr **faddr,
3537     u_int16_t *lport, u_int16_t *fport)
3538 {
3539         if (entry != NULL) {
3540                 *laddr = &entry->cfentry_laddr.addr6;
3541                 *faddr = &entry->cfentry_faddr.addr6;
3542                 *lport = entry->cfentry_lport;
3543                 *fport = entry->cfentry_fport;
3544         } else {
3545                 *laddr = &inp->in6p_laddr;
3546                 *faddr = &inp->in6p_faddr;
3547                 *lport = inp->inp_lport;
3548                 *fport = inp->inp_fport;
3549         }
3550 }
3551
3552 static void
3553 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3554     struct in_addr *laddr, struct in_addr *faddr,
3555     u_int16_t *lport, u_int16_t *fport)
3556 {
3557         if (entry != NULL) {
3558                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3559                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3560                 *lport = entry->cfentry_lport;
3561                 *fport = entry->cfentry_fport;
3562         } else {
3563                 *laddr = inp->inp_laddr;
3564                 *faddr = inp->inp_faddr;
3565                 *lport = inp->inp_lport;
3566                 *fport = inp->inp_fport;
3567         }
3568 }
3569
3570 static int
3571 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3572     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3573 {
3574         errno_t error = 0;
3575         struct mbuf *copy = NULL;
3576         struct mbuf *msg = NULL;
3577         unsigned int one = 1;
3578         struct cfil_msg_data_event *data_req;
3579         size_t hdrsize;
3580         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3581         struct cfil_entry *entry;
3582         struct cfe_buf *entrybuf;
3583         struct content_filter *cfc;
3584         struct timeval tv;
3585         int inp_flags = 0;
3586
3587         cfil_rw_lock_shared(&cfil_lck_rw);
3588
3589         entry = &cfil_info->cfi_entries[kcunit - 1];
3590         if (outgoing) {
3591                 entrybuf = &entry->cfe_snd;
3592         } else {
3593                 entrybuf = &entry->cfe_rcv;
3594         }
3595
3596         cfc = entry->cfe_filter;
3597         if (cfc == NULL) {
3598                 goto done;
3599         }
3600
3601         data = cfil_data_start(data);
3602         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3603                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3604                 goto done;
3605         }
3606
3607         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3608             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3609
3610         socket_lock_assert_owned(so);
3611
3612         /* Would be wasteful to try */
3613         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3614                 error = ENOBUFS;
3615                 goto done;
3616         }
3617
3618         /* Make a copy of the data to pass to kernel control socket */
3619         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3620             M_COPYM_NOOP_HDR);
3621         if (copy == NULL) {
3622                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3623                 error = ENOMEM;
3624                 goto done;
3625         }
3626
3627         /* We need an mbuf packet for the message header */
3628         hdrsize = sizeof(struct cfil_msg_data_event);
3629         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3630         if (error != 0) {
3631                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3632                 m_freem(copy);
3633                 /*
3634                  * ENOBUFS is to indicate flow control
3635                  */
3636                 error = ENOMEM;
3637                 goto done;
3638         }
3639         mbuf_setlen(msg, hdrsize);
3640         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3641         msg->m_next = copy;
3642         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3643         bzero(data_req, hdrsize);
3644         data_req->cfd_msghdr.cfm_len = (uint32_t)hdrsize + copylen;
3645         data_req->cfd_msghdr.cfm_version = 1;
3646         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3647         data_req->cfd_msghdr.cfm_op =
3648             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3649         data_req->cfd_msghdr.cfm_sock_id =
3650             entry->cfe_cfil_info->cfi_sock_id;
3651         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3652         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3653
3654         data_req->cfd_flags = 0;
3655         if (OPTIONAL_IP_HEADER(so)) {
3656                 /*
3657                  * For non-UDP/TCP traffic, indicate to filters if optional
3658                  * IP header is present:
3659                  *      outgoing - indicate according to INP_HDRINCL flag
3660                  *      incoming - For IPv4 only, stripping of IP header is
3661                  *                 optional.  But for CFIL, we delay stripping
3662                  *                 at rip_input.  So CFIL always expects IP
3663                  *                 frames. IP header will be stripped according
3664                  *                 to INP_STRIPHDR flag later at reinjection.
3665                  */
3666                 if ((!outgoing && !IS_INP_V6(inp)) ||
3667                     (outgoing && cfil_dgram_peek_socket_state(data, &inp_flags) && (inp_flags & INP_HDRINCL))) {
3668                         data_req->cfd_flags |= CFD_DATA_FLAG_IP_HEADER;
3669                 }
3670         }
3671
3672         /*
3673          * Copy address/port into event msg.
3674          * For non connected sockets need to copy addresses from passed
3675          * parameters
3676          */
3677         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3678             &data_req->cfc_src, &data_req->cfc_dst,
3679             !IS_INP_V6(inp), outgoing);
3680
3681         if (cfil_info->cfi_debug) {
3682                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: SENDING DATA UP");
3683         }
3684
3685         if (cfil_info->cfi_isSignatureLatest == false) {
3686                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3687         }
3688
3689         microuptime(&tv);
3690         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3691
3692         /* Pass the message to the content filter */
3693         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3694             entry->cfe_filter->cf_kcunit,
3695             msg, CTL_DATA_EOR);
3696         if (error != 0) {
3697                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3698                 mbuf_freem(msg);
3699                 goto done;
3700         }
3701         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3702         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3703
3704 #if VERDICT_DEBUG
3705         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3706             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3707 #endif
3708
3709         if (cfil_info->cfi_debug) {
3710                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u (%s)",
3711                     (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen,
3712                     data_req->cfd_flags & CFD_DATA_FLAG_IP_HEADER ? "IP HDR" : "NO IP HDR");
3713         }
3714
3715 done:
3716         if (error == ENOBUFS) {
3717                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3718                 OSIncrementAtomic(
3719                         &cfil_stats.cfs_data_event_flow_control);
3720
3721                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3722                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3723                 }
3724
3725                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3726
3727                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3728         } else {
3729                 if (error != 0) {
3730                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3731                 }
3732
3733                 cfil_rw_unlock_shared(&cfil_lck_rw);
3734         }
3735         return error;
3736 }
3737
3738 /*
3739  * Process the queue of data waiting to be delivered to content filter
3740  */
3741 static int
3742 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3743 {
3744         errno_t error = 0;
3745         struct mbuf *data, *tmp = NULL;
3746         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3747         struct cfil_entry *entry;
3748         struct cfe_buf *entrybuf;
3749         uint64_t currentoffset = 0;
3750
3751         if (cfil_info == NULL) {
3752                 return 0;
3753         }
3754
3755         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3756             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3757
3758         socket_lock_assert_owned(so);
3759
3760         entry = &cfil_info->cfi_entries[kcunit - 1];
3761         if (outgoing) {
3762                 entrybuf = &entry->cfe_snd;
3763         } else {
3764                 entrybuf = &entry->cfe_rcv;
3765         }
3766
3767         /* Send attached message if not yet done */
3768         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3769                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3770                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3771                 if (error != 0) {
3772                         /* We can recover from flow control */
3773                         if (error == ENOBUFS || error == ENOMEM) {
3774                                 error = 0;
3775                         }
3776                         goto done;
3777                 }
3778         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3779                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3780                 goto done;
3781         }
3782
3783 #if DATA_DEBUG
3784         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3785             entrybuf->cfe_pass_offset,
3786             entrybuf->cfe_peeked,
3787             entrybuf->cfe_peek_offset);
3788 #endif
3789
3790         /* Move all data that can pass */
3791         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3792             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3793                 datalen = cfil_data_length(data, NULL, NULL);
3794                 tmp = data;
3795
3796                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3797                     entrybuf->cfe_pass_offset) {
3798                         /*
3799                          * The first mbuf can fully pass
3800                          */
3801                         copylen = datalen;
3802                 } else {
3803                         /*
3804                          * The first mbuf can partially pass
3805                          */
3806                         copylen = (unsigned int)(entrybuf->cfe_pass_offset - entrybuf->cfe_ctl_q.q_start);
3807                 }
3808                 VERIFY(copylen <= datalen);
3809
3810 #if DATA_DEBUG
3811                 CFIL_LOG(LOG_DEBUG,
3812                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3813                     "datalen %u copylen %u",
3814                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3815                     entrybuf->cfe_ctl_q.q_start,
3816                     entrybuf->cfe_peeked,
3817                     entrybuf->cfe_pass_offset,
3818                     entrybuf->cfe_peek_offset,
3819                     datalen, copylen);
3820 #endif
3821
3822                 /*
3823                  * Data that passes has been peeked at explicitly or
3824                  * implicitly
3825                  */
3826                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3827                     entrybuf->cfe_peeked) {
3828                         entrybuf->cfe_peeked =
3829                             entrybuf->cfe_ctl_q.q_start + copylen;
3830                 }
3831                 /*
3832                  * Stop on partial pass
3833                  */
3834                 if (copylen < datalen) {
3835                         break;
3836                 }
3837
3838                 /* All good, move full data from ctl queue to pending queue */
3839                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3840
3841                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3842                 if (outgoing) {
3843                         OSAddAtomic64(datalen,
3844                             &cfil_stats.cfs_pending_q_out_enqueued);
3845                 } else {
3846                         OSAddAtomic64(datalen,
3847                             &cfil_stats.cfs_pending_q_in_enqueued);
3848                 }
3849         }
3850         CFIL_INFO_VERIFY(cfil_info);
3851         if (tmp != NULL) {
3852                 CFIL_LOG(LOG_DEBUG,
3853                     "%llx first %llu peeked %llu pass %llu peek %llu"
3854                     "datalen %u copylen %u",
3855                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3856                     entrybuf->cfe_ctl_q.q_start,
3857                     entrybuf->cfe_peeked,
3858                     entrybuf->cfe_pass_offset,
3859                     entrybuf->cfe_peek_offset,
3860                     datalen, copylen);
3861         }
3862         tmp = NULL;
3863
3864         /* Now deal with remaining data the filter wants to peek at */
3865         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3866             currentoffset = entrybuf->cfe_ctl_q.q_start;
3867             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3868             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3869             currentoffset += datalen) {
3870                 datalen = cfil_data_length(data, NULL, NULL);
3871                 tmp = data;
3872
3873                 /* We've already peeked at this mbuf */
3874                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3875                         continue;
3876                 }
3877                 /*
3878                  * The data in the first mbuf may have been
3879                  * partially peeked at
3880                  */
3881                 copyoffset = (unsigned int)(entrybuf->cfe_peeked - currentoffset);
3882                 VERIFY(copyoffset < datalen);
3883                 copylen = datalen - copyoffset;
3884                 VERIFY(copylen <= datalen);
3885                 /*
3886                  * Do not copy more than needed
3887                  */
3888                 if (currentoffset + copyoffset + copylen >
3889                     entrybuf->cfe_peek_offset) {
3890                         copylen = (unsigned int)(entrybuf->cfe_peek_offset -
3891                             (currentoffset + copyoffset));
3892                 }
3893
3894 #if DATA_DEBUG
3895                 CFIL_LOG(LOG_DEBUG,
3896                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3897                     "datalen %u copylen %u copyoffset %u",
3898                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3899                     currentoffset,
3900                     entrybuf->cfe_peeked,
3901                     entrybuf->cfe_pass_offset,
3902                     entrybuf->cfe_peek_offset,
3903                     datalen, copylen, copyoffset);
3904 #endif
3905
3906                 /*
3907                  * Stop if there is nothing more to peek at
3908                  */
3909                 if (copylen == 0) {
3910                         break;
3911                 }
3912                 /*
3913                  * Let the filter get a peek at this span of data
3914                  */
3915                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3916                     outgoing, data, copyoffset, copylen);
3917                 if (error != 0) {
3918                         /* On error, leave data in ctl_q */
3919                         break;
3920                 }
3921                 entrybuf->cfe_peeked += copylen;
3922                 if (outgoing) {
3923                         OSAddAtomic64(copylen,
3924                             &cfil_stats.cfs_ctl_q_out_peeked);
3925                 } else {
3926                         OSAddAtomic64(copylen,
3927                             &cfil_stats.cfs_ctl_q_in_peeked);
3928                 }
3929
3930                 /* Stop when data could not be fully peeked at */
3931                 if (copylen + copyoffset < datalen) {
3932                         break;
3933                 }
3934         }
3935         CFIL_INFO_VERIFY(cfil_info);
3936         if (tmp != NULL) {
3937                 CFIL_LOG(LOG_DEBUG,
3938                     "%llx first %llu peeked %llu pass %llu peek %llu"
3939                     "datalen %u copylen %u copyoffset %u",
3940                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3941                     currentoffset,
3942                     entrybuf->cfe_peeked,
3943                     entrybuf->cfe_pass_offset,
3944                     entrybuf->cfe_peek_offset,
3945                     datalen, copylen, copyoffset);
3946         }
3947
3948         /*
3949          * Process data that has passed the filter
3950          */
3951         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3952         if (error != 0) {
3953                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3954                     error);
3955                 goto done;
3956         }
3957
3958         /*
3959          * Dispatch disconnect events that could not be sent
3960          */
3961         if (cfil_info == NULL) {
3962                 goto done;
3963         } else if (outgoing) {
3964                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3965                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3966                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3967                 }
3968         } else {
3969                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3970                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3971                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3972                 }
3973         }
3974
3975 done:
3976         CFIL_LOG(LOG_DEBUG,
3977             "first %llu peeked %llu pass %llu peek %llu",
3978             entrybuf->cfe_ctl_q.q_start,
3979             entrybuf->cfe_peeked,
3980             entrybuf->cfe_pass_offset,
3981             entrybuf->cfe_peek_offset);
3982
3983         CFIL_INFO_VERIFY(cfil_info);
3984         return error;
3985 }
3986
3987 /*
3988  * cfil_data_filter()
3989  *
3990  * Process data for a content filter installed on a socket
3991  */
3992 int
3993 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3994     struct mbuf *data, uint32_t datalen)
3995 {
3996         errno_t error = 0;
3997         struct cfil_entry *entry;
3998         struct cfe_buf *entrybuf;
3999
4000         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4001             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4002
4003         socket_lock_assert_owned(so);
4004
4005         entry = &cfil_info->cfi_entries[kcunit - 1];
4006         if (outgoing) {
4007                 entrybuf = &entry->cfe_snd;
4008         } else {
4009                 entrybuf = &entry->cfe_rcv;
4010         }
4011
4012         /* Are we attached to the filter? */
4013         if (entry->cfe_filter == NULL) {
4014                 error = 0;
4015                 goto done;
4016         }
4017
4018         /* Dispatch to filters */
4019         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
4020         if (outgoing) {
4021                 OSAddAtomic64(datalen,
4022                     &cfil_stats.cfs_ctl_q_out_enqueued);
4023         } else {
4024                 OSAddAtomic64(datalen,
4025                     &cfil_stats.cfs_ctl_q_in_enqueued);
4026         }
4027
4028         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4029         if (error != 0) {
4030                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4031                     error);
4032         }
4033         /*
4034          * We have to return EJUSTRETURN in all cases to avoid double free
4035          * by socket layer
4036          */
4037         error = EJUSTRETURN;
4038 done:
4039         CFIL_INFO_VERIFY(cfil_info);
4040
4041         CFIL_LOG(LOG_INFO, "return %d", error);
4042         return error;
4043 }
4044
4045 /*
4046  * cfil_service_inject_queue() re-inject data that passed the
4047  * content filters
4048  */
4049 static int
4050 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4051 {
4052         mbuf_t data;
4053         unsigned int datalen;
4054         int mbcnt = 0;
4055         int mbnum = 0;
4056         errno_t error = 0;
4057         struct cfi_buf *cfi_buf;
4058         struct cfil_queue *inject_q;
4059         int need_rwakeup = 0;
4060         int count = 0;
4061         struct inpcb *inp = NULL;
4062         struct ip *ip = NULL;
4063         unsigned int hlen;
4064
4065         if (cfil_info == NULL) {
4066                 return 0;
4067         }
4068
4069         socket_lock_assert_owned(so);
4070
4071         if (outgoing) {
4072                 cfi_buf = &cfil_info->cfi_snd;
4073                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
4074         } else {
4075                 cfi_buf = &cfil_info->cfi_rcv;
4076                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
4077         }
4078         inject_q = &cfi_buf->cfi_inject_q;
4079
4080         if (cfil_queue_empty(inject_q)) {
4081                 return 0;
4082         }
4083
4084 #if DATA_DEBUG | VERDICT_DEBUG
4085         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
4086             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
4087 #endif
4088
4089         while ((data = cfil_queue_first(inject_q)) != NULL) {
4090                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
4091
4092 #if DATA_DEBUG
4093                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4094                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4095 #endif
4096                 if (cfil_info->cfi_debug) {
4097                         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> data %llx datalen %u (mbcnt %u)",
4098                             (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
4099                 }
4100
4101                 /* Remove data from queue and adjust stats */
4102                 cfil_queue_remove(inject_q, data, datalen);
4103                 cfi_buf->cfi_pending_first += datalen;
4104                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4105                 cfi_buf->cfi_pending_mbnum -= mbnum;
4106                 cfil_info_buf_verify(cfi_buf);
4107
4108                 if (outgoing) {
4109                         error = sosend_reinject(so, NULL, data, NULL, 0);
4110                         if (error != 0) {
4111 #if DATA_DEBUG
4112                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4113                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4114 #endif
4115                                 break;
4116                         }
4117                         // At least one injection succeeded, need to wake up pending threads.
4118                         need_rwakeup = 1;
4119                 } else {
4120                         data->m_flags |= M_SKIPCFIL;
4121
4122                         /*
4123                          * NOTE: We currently only support TCP, UDP, ICMP,
4124                          * ICMPv6 and RAWIP.  For MPTCP and message TCP we'll
4125                          * need to call the appropriate sbappendxxx()
4126                          * of fix sock_inject_data_in()
4127                          */
4128                         if (IS_IP_DGRAM(so)) {
4129                                 if (OPTIONAL_IP_HEADER(so)) {
4130                                         inp = sotoinpcb(so);
4131                                         if (inp && (inp->inp_flags & INP_STRIPHDR)) {
4132                                                 mbuf_t data_start = cfil_data_start(data);
4133                                                 if (data_start != NULL && (data_start->m_flags & M_PKTHDR)) {
4134                                                         ip = mtod(data_start, struct ip *);
4135                                                         hlen = IP_VHL_HL(ip->ip_vhl) << 2;
4136                                                         data_start->m_len -= hlen;
4137                                                         data_start->m_pkthdr.len -= hlen;
4138                                                         data_start->m_data += hlen;
4139                                                 }
4140                                         }
4141                                 }
4142
4143                                 if (sbappendchain(&so->so_rcv, data, 0)) {
4144                                         need_rwakeup = 1;
4145                                 }
4146                         } else {
4147                                 if (sbappendstream(&so->so_rcv, data)) {
4148                                         need_rwakeup = 1;
4149                                 }
4150                         }
4151                 }
4152
4153                 if (outgoing) {
4154                         OSAddAtomic64(datalen,
4155                             &cfil_stats.cfs_inject_q_out_passed);
4156                 } else {
4157                         OSAddAtomic64(datalen,
4158                             &cfil_stats.cfs_inject_q_in_passed);
4159                 }
4160
4161                 count++;
4162         }
4163
4164 #if DATA_DEBUG | VERDICT_DEBUG
4165         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4166             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4167 #endif
4168         if (cfil_info->cfi_debug) {
4169                 CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4170                     (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4171         }
4172
4173         /* A single wakeup is for several packets is more efficient */
4174         if (need_rwakeup) {
4175                 if (outgoing == TRUE) {
4176                         sowwakeup(so);
4177                 } else {
4178                         sorwakeup(so);
4179                 }
4180         }
4181
4182         if (error != 0 && cfil_info) {
4183                 if (error == ENOBUFS) {
4184                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4185                 }
4186                 if (error == ENOMEM) {
4187                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4188                 }
4189
4190                 if (outgoing) {
4191                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4192                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4193                 } else {
4194                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4195                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4196                 }
4197         }
4198
4199         /*
4200          * Notify
4201          */
4202         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4203                 cfil_sock_notify_shutdown(so, SHUT_WR);
4204                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4205                         soshutdownlock_final(so, SHUT_WR);
4206                 }
4207         }
4208         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4209                 if (cfil_filters_attached(so) == 0) {
4210                         CFIL_LOG(LOG_INFO, "so %llx waking",
4211                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4212                         wakeup((caddr_t)cfil_info);
4213                 }
4214         }
4215
4216         CFIL_INFO_VERIFY(cfil_info);
4217
4218         return error;
4219 }
4220
4221 static int
4222 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4223 {
4224         uint64_t passlen, curlen;
4225         mbuf_t data;
4226         unsigned int datalen;
4227         errno_t error = 0;
4228         struct cfil_entry *entry;
4229         struct cfe_buf *entrybuf;
4230         struct cfil_queue *pending_q;
4231
4232         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4233             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4234
4235         socket_lock_assert_owned(so);
4236
4237         entry = &cfil_info->cfi_entries[kcunit - 1];
4238         if (outgoing) {
4239                 entrybuf = &entry->cfe_snd;
4240         } else {
4241                 entrybuf = &entry->cfe_rcv;
4242         }
4243
4244         pending_q = &entrybuf->cfe_pending_q;
4245
4246         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4247
4248         /*
4249          * Locate the chunks of data that we can pass to the next filter
4250          * A data chunk must be on mbuf boundaries
4251          */
4252         curlen = 0;
4253         while ((data = cfil_queue_first(pending_q)) != NULL) {
4254                 struct cfil_entry *iter_entry;
4255                 datalen = cfil_data_length(data, NULL, NULL);
4256
4257 #if DATA_DEBUG
4258                 CFIL_LOG(LOG_DEBUG,
4259                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4260                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4261                     passlen, curlen);
4262 #endif
4263
4264                 if (curlen + datalen > passlen) {
4265                         break;
4266                 }
4267
4268                 cfil_queue_remove(pending_q, data, datalen);
4269
4270                 curlen += datalen;
4271
4272                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4273                     iter_entry != NULL;
4274                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4275                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4276                             data, datalen);
4277                         /* 0 means passed so we can continue */
4278                         if (error != 0) {
4279                                 break;
4280                         }
4281                 }
4282                 /* When data has passed all filters, re-inject */
4283                 if (error == 0) {
4284                         if (outgoing) {
4285                                 cfil_queue_enqueue(
4286                                         &cfil_info->cfi_snd.cfi_inject_q,
4287                                         data, datalen);
4288                                 OSAddAtomic64(datalen,
4289                                     &cfil_stats.cfs_inject_q_out_enqueued);
4290                         } else {
4291                                 cfil_queue_enqueue(
4292                                         &cfil_info->cfi_rcv.cfi_inject_q,
4293                                         data, datalen);
4294                                 OSAddAtomic64(datalen,
4295                                     &cfil_stats.cfs_inject_q_in_enqueued);
4296                         }
4297                 }
4298         }
4299
4300         CFIL_INFO_VERIFY(cfil_info);
4301
4302         return error;
4303 }
4304
4305 int
4306 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4307     uint64_t pass_offset, uint64_t peek_offset)
4308 {
4309         errno_t error = 0;
4310         struct cfil_entry *entry = NULL;
4311         struct cfe_buf *entrybuf;
4312         int updated = 0;
4313
4314         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4315
4316         socket_lock_assert_owned(so);
4317
4318         if (cfil_info == NULL) {
4319                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4320                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4321                 error = 0;
4322                 goto done;
4323         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4324                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4325                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4326                 error = EPIPE;
4327                 goto done;
4328         }
4329
4330         entry = &cfil_info->cfi_entries[kcunit - 1];
4331         if (outgoing) {
4332                 entrybuf = &entry->cfe_snd;
4333         } else {
4334                 entrybuf = &entry->cfe_rcv;
4335         }
4336
4337         /* Record updated offsets for this content filter */
4338         if (pass_offset > entrybuf->cfe_pass_offset) {
4339                 entrybuf->cfe_pass_offset = pass_offset;
4340
4341                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4342                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4343                 }
4344                 updated = 1;
4345         } else {
4346                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4347                     pass_offset, entrybuf->cfe_pass_offset);
4348         }
4349         /* Filter does not want or need to see data that's allowed to pass */
4350         if (peek_offset > entrybuf->cfe_pass_offset &&
4351             peek_offset > entrybuf->cfe_peek_offset) {
4352                 entrybuf->cfe_peek_offset = peek_offset;
4353                 updated = 1;
4354         }
4355         /* Nothing to do */
4356         if (updated == 0) {
4357                 goto done;
4358         }
4359
4360         /* Move data held in control queue to pending queue if needed */
4361         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4362         if (error != 0) {
4363                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4364                     error);
4365                 goto done;
4366         }
4367         error = EJUSTRETURN;
4368
4369 done:
4370         /*
4371          * The filter is effectively detached when pass all from both sides
4372          * or when the socket is closed and no more data is waiting
4373          * to be delivered to the filter
4374          */
4375         if (entry != NULL &&
4376             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4377             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4378             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4379             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4380             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4381                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4382 #if LIFECYCLE_DEBUG
4383                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4384                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4385                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4386 #endif
4387                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4388                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4389                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4390                     cfil_filters_attached(so) == 0) {
4391 #if LIFECYCLE_DEBUG
4392                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4393 #endif
4394                         CFIL_LOG(LOG_INFO, "so %llx waking",
4395                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4396                         wakeup((caddr_t)cfil_info);
4397                 }
4398         }
4399         CFIL_INFO_VERIFY(cfil_info);
4400         CFIL_LOG(LOG_INFO, "return %d", error);
4401         return error;
4402 }
4403
4404 /*
4405  * Update pass offset for socket when no data is pending
4406  */
4407 static int
4408 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4409 {
4410         struct cfi_buf *cfi_buf;
4411         struct cfil_entry *entry;
4412         struct cfe_buf *entrybuf;
4413         uint32_t kcunit;
4414         uint64_t pass_offset = 0;
4415         boolean_t first = true;
4416
4417         if (cfil_info == NULL) {
4418                 return 0;
4419         }
4420
4421         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4422             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4423
4424         socket_lock_assert_owned(so);
4425
4426         if (outgoing) {
4427                 cfi_buf = &cfil_info->cfi_snd;
4428         } else {
4429                 cfi_buf = &cfil_info->cfi_rcv;
4430         }
4431
4432         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4433             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4434             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4435
4436         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4437                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4438                         entry = &cfil_info->cfi_entries[kcunit - 1];
4439
4440                         /* Are we attached to a filter? */
4441                         if (entry->cfe_filter == NULL) {
4442                                 continue;
4443                         }
4444
4445                         if (outgoing) {
4446                                 entrybuf = &entry->cfe_snd;
4447                         } else {
4448                                 entrybuf = &entry->cfe_rcv;
4449                         }
4450
4451                         // Keep track of the smallest pass_offset among filters.
4452                         if (first == true ||
4453                             entrybuf->cfe_pass_offset < pass_offset) {
4454                                 pass_offset = entrybuf->cfe_pass_offset;
4455                                 first = false;
4456                         }
4457                 }
4458                 cfi_buf->cfi_pass_offset = pass_offset;
4459         }
4460
4461         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4462             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4463
4464         return 0;
4465 }
4466
4467 int
4468 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4469     uint64_t pass_offset, uint64_t peek_offset)
4470 {
4471         errno_t error = 0;
4472
4473         CFIL_LOG(LOG_INFO, "");
4474
4475         socket_lock_assert_owned(so);
4476
4477         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4478         if (error != 0) {
4479                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4480                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4481                     outgoing ? "out" : "in");
4482                 goto release;
4483         }
4484
4485         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4486             pass_offset, peek_offset);
4487
4488         cfil_service_inject_queue(so, cfil_info, outgoing);
4489
4490         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4491 release:
4492         CFIL_INFO_VERIFY(cfil_info);
4493         cfil_release_sockbuf(so, outgoing);
4494
4495         return error;
4496 }
4497
4498
4499 static void
4500 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4501 {
4502         struct cfil_entry *entry;
4503         int kcunit;
4504         uint64_t drained;
4505
4506         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4507                 goto done;
4508         }
4509
4510         socket_lock_assert_owned(so);
4511
4512         /*
4513          * Flush the output queues and ignore errors as long as
4514          * we are attached
4515          */
4516         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4517         if (cfil_info != NULL) {
4518                 drained = 0;
4519                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4520                         entry = &cfil_info->cfi_entries[kcunit - 1];
4521
4522                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4523                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4524                 }
4525                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4526
4527                 if (drained) {
4528                         if (cfil_info->cfi_flags & CFIF_DROP) {
4529                                 OSIncrementAtomic(
4530                                         &cfil_stats.cfs_flush_out_drop);
4531                         } else {
4532                                 OSIncrementAtomic(
4533                                         &cfil_stats.cfs_flush_out_close);
4534                         }
4535                 }
4536         }
4537         cfil_release_sockbuf(so, 1);
4538
4539         /*
4540          * Flush the input queues
4541          */
4542         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4543         if (cfil_info != NULL) {
4544                 drained = 0;
4545                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4546                         entry = &cfil_info->cfi_entries[kcunit - 1];
4547
4548                         drained += cfil_queue_drain(
4549                                 &entry->cfe_rcv.cfe_ctl_q);
4550                         drained += cfil_queue_drain(
4551                                 &entry->cfe_rcv.cfe_pending_q);
4552                 }
4553                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4554
4555                 if (drained) {
4556                         if (cfil_info->cfi_flags & CFIF_DROP) {
4557                                 OSIncrementAtomic(
4558                                         &cfil_stats.cfs_flush_in_drop);
4559                         } else {
4560                                 OSIncrementAtomic(
4561                                         &cfil_stats.cfs_flush_in_close);
4562                         }
4563                 }
4564         }
4565         cfil_release_sockbuf(so, 0);
4566 done:
4567         CFIL_INFO_VERIFY(cfil_info);
4568 }
4569
4570 int
4571 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4572 {
4573         errno_t error = 0;
4574         struct cfil_entry *entry;
4575         struct proc *p;
4576
4577         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4578                 goto done;
4579         }
4580
4581         socket_lock_assert_owned(so);
4582
4583         entry = &cfil_info->cfi_entries[kcunit - 1];
4584
4585         /* Are we attached to the filter? */
4586         if (entry->cfe_filter == NULL) {
4587                 goto done;
4588         }
4589
4590         cfil_info->cfi_flags |= CFIF_DROP;
4591
4592         p = current_proc();
4593
4594         /*
4595          * Force the socket to be marked defunct
4596          * (forcing fixed along with rdar://19391339)
4597          */
4598         if (so->so_cfil_db == NULL) {
4599                 error = sosetdefunct(p, so,
4600                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4601                     FALSE);
4602
4603                 /* Flush the socket buffer and disconnect */
4604                 if (error == 0) {
4605                         error = sodefunct(p, so,
4606                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4607                 }
4608         }
4609
4610         /* The filter is done, mark as detached */
4611         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4612 #if LIFECYCLE_DEBUG
4613         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4614 #endif
4615         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4616             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4617
4618         /* Pending data needs to go */
4619         cfil_flush_queues(so, cfil_info);
4620
4621         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4622                 if (cfil_filters_attached(so) == 0) {
4623                         CFIL_LOG(LOG_INFO, "so %llx waking",
4624                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4625                         wakeup((caddr_t)cfil_info);
4626                 }
4627         }
4628 done:
4629         return error;
4630 }
4631
4632 int
4633 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4634 {
4635         errno_t error = 0;
4636         struct cfil_info *cfil_info = NULL;
4637
4638         bool cfil_attached = false;
4639         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4640
4641         // Search and lock socket
4642         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4643         if (so == NULL) {
4644                 error = ENOENT;
4645         } else {
4646                 // The client gets a pass automatically
4647                 cfil_info = (so->so_cfil_db != NULL) ?
4648                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4649
4650                 if (cfil_attached) {
4651 #if VERDICT_DEBUG
4652                         if (cfil_info != NULL) {
4653                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4654                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4655                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4656                                     cfil_info->cfi_sock_id);
4657                         }
4658 #endif
4659                         cfil_sock_received_verdict(so);
4660                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4661                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4662                 } else {
4663                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4664                 }
4665                 socket_unlock(so, 1);
4666         }
4667
4668         return error;
4669 }
4670
4671 int
4672 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4673 {
4674         struct content_filter *cfc = NULL;
4675         cfil_crypto_state_t crypto_state = NULL;
4676         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4677
4678         CFIL_LOG(LOG_NOTICE, "");
4679
4680         if (content_filters == NULL) {
4681                 CFIL_LOG(LOG_ERR, "no content filter");
4682                 return EINVAL;
4683         }
4684         if (kcunit > MAX_CONTENT_FILTER) {
4685                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4686                     kcunit, MAX_CONTENT_FILTER);
4687                 return EINVAL;
4688         }
4689         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4690         if (crypto_state == NULL) {
4691                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4692                     kcunit);
4693                 return EINVAL;
4694         }
4695
4696         cfil_rw_lock_exclusive(&cfil_lck_rw);
4697
4698         cfc = content_filters[kcunit - 1];
4699         if (cfc->cf_kcunit != kcunit) {
4700                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4701                     kcunit);
4702                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4703                 cfil_crypto_cleanup_state(crypto_state);
4704                 return EINVAL;
4705         }
4706         if (cfc->cf_crypto_state != NULL) {
4707                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4708                 cfc->cf_crypto_state = NULL;
4709         }
4710         cfc->cf_crypto_state = crypto_state;
4711
4712         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4713         return 0;
4714 }
4715
4716 static int
4717 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4718 {
4719         struct cfil_entry *entry;
4720         struct cfe_buf *entrybuf;
4721         uint32_t kcunit;
4722
4723         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4724             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4725
4726         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4727                 entry = &cfil_info->cfi_entries[kcunit - 1];
4728
4729                 /* Are we attached to the filter? */
4730                 if (entry->cfe_filter == NULL) {
4731                         continue;
4732                 }
4733
4734                 if (outgoing) {
4735                         entrybuf = &entry->cfe_snd;
4736                 } else {
4737                         entrybuf = &entry->cfe_rcv;
4738                 }
4739
4740                 entrybuf->cfe_ctl_q.q_start += datalen;
4741                 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4742                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4743                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4744                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4745                 }
4746
4747                 entrybuf->cfe_ctl_q.q_end += datalen;
4748
4749                 entrybuf->cfe_pending_q.q_start += datalen;
4750                 entrybuf->cfe_pending_q.q_end += datalen;
4751         }
4752         CFIL_INFO_VERIFY(cfil_info);
4753         return 0;
4754 }
4755
4756 int
4757 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4758     struct mbuf *data, struct mbuf *control, uint32_t flags)
4759 {
4760 #pragma unused(to, control, flags)
4761         errno_t error = 0;
4762         unsigned int datalen;
4763         int mbcnt = 0;
4764         int mbnum = 0;
4765         int kcunit;
4766         struct cfi_buf *cfi_buf;
4767         struct mbuf *chain = NULL;
4768
4769         if (cfil_info == NULL) {
4770                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4771                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4772                 error = 0;
4773                 goto done;
4774         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4775                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4776                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4777                 error = EPIPE;
4778                 goto done;
4779         }
4780
4781         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4782
4783         if (outgoing) {
4784                 cfi_buf = &cfil_info->cfi_snd;
4785                 cfil_info->cfi_byte_outbound_count += datalen;
4786         } else {
4787                 cfi_buf = &cfil_info->cfi_rcv;
4788                 cfil_info->cfi_byte_inbound_count += datalen;
4789         }
4790
4791         cfi_buf->cfi_pending_last += datalen;
4792         cfi_buf->cfi_pending_mbcnt += mbcnt;
4793         cfi_buf->cfi_pending_mbnum += mbnum;
4794
4795         if (IS_IP_DGRAM(so)) {
4796                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4797                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4798                         cfi_buf->cfi_tail_drop_cnt++;
4799                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4800                         cfi_buf->cfi_pending_mbnum -= mbnum;
4801                         return EPIPE;
4802                 }
4803         }
4804
4805         cfil_info_buf_verify(cfi_buf);
4806
4807 #if DATA_DEBUG
4808         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4809             (uint64_t)VM_KERNEL_ADDRPERM(so),
4810             outgoing ? "OUT" : "IN",
4811             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4812             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4813             cfi_buf->cfi_pending_last,
4814             cfi_buf->cfi_pending_mbcnt,
4815             cfi_buf->cfi_pass_offset);
4816 #endif
4817
4818         /* Fast path when below pass offset */
4819         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4820                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4821 #if DATA_DEBUG
4822                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4823 #endif
4824         } else {
4825                 struct cfil_entry *iter_entry;
4826                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4827                         // Is cfil attached to this filter?
4828                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4829                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4830                                 if (IS_IP_DGRAM(so) && chain == NULL) {
4831                                         /* Datagrams only:
4832                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4833                                          * This full chain will be reinjected into socket after recieving verdict.
4834                                          */
4835                                         (void) cfil_dgram_save_socket_state(cfil_info, data);
4836                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4837                                         if (chain == NULL) {
4838                                                 return ENOBUFS;
4839                                         }
4840                                         data = chain;
4841                                 }
4842                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4843                                     datalen);
4844                         }
4845                         /* 0 means passed so continue with next filter */
4846                         if (error != 0) {
4847                                 break;
4848                         }
4849                 }
4850         }
4851
4852         /* Move cursor if no filter claimed the data */
4853         if (error == 0) {
4854                 cfi_buf->cfi_pending_first += datalen;
4855                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4856                 cfi_buf->cfi_pending_mbnum -= mbnum;
4857                 cfil_info_buf_verify(cfi_buf);
4858         }
4859 done:
4860         CFIL_INFO_VERIFY(cfil_info);
4861
4862         return error;
4863 }
4864
4865 /*
4866  * Callback from socket layer sosendxxx()
4867  */
4868 int
4869 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4870     struct mbuf *data, struct mbuf *control, uint32_t flags)
4871 {
4872         int error = 0;
4873         int new_filter_control_unit = 0;
4874
4875         if (IS_IP_DGRAM(so)) {
4876                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4877         }
4878
4879         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4880                 /* Drop pre-existing TCP sockets if filter is enabled now */
4881                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4882                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4883                         if (new_filter_control_unit > 0) {
4884                                 return EPIPE;
4885                         }
4886                 }
4887                 return 0;
4888         }
4889
4890         /* Drop pre-existing TCP sockets when filter state changed */
4891         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4892         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4893                 return EPIPE;
4894         }
4895
4896         /*
4897          * Pass initial data for TFO.
4898          */
4899         if (IS_INITIAL_TFO_DATA(so)) {
4900                 return 0;
4901         }
4902
4903         socket_lock_assert_owned(so);
4904
4905         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4906                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4907                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4908                 return EPIPE;
4909         }
4910         if (control != NULL) {
4911                 CFIL_LOG(LOG_ERR, "so %llx control",
4912                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4913                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4914         }
4915         if ((flags & MSG_OOB)) {
4916                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4917                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4918                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4919         }
4920         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4921                 panic("so %p SB_LOCK not set", so);
4922         }
4923
4924         if (so->so_snd.sb_cfil_thread != NULL) {
4925                 panic("%s sb_cfil_thread %p not NULL", __func__,
4926                     so->so_snd.sb_cfil_thread);
4927         }
4928
4929         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4930
4931         return error;
4932 }
4933
4934 /*
4935  * Callback from socket layer sbappendxxx()
4936  */
4937 int
4938 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4939     struct mbuf *data, struct mbuf *control, uint32_t flags)
4940 {
4941         int error = 0;
4942         int new_filter_control_unit = 0;
4943
4944         if (IS_IP_DGRAM(so)) {
4945                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4946         }
4947
4948         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4949                 /* Drop pre-existing TCP sockets if filter is enabled now */
4950                 if (cfil_active_count > 0 && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4951                         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4952                         if (new_filter_control_unit > 0) {
4953                                 return EPIPE;
4954                         }
4955                 }
4956                 return 0;
4957         }
4958
4959         /* Drop pre-existing TCP sockets when filter state changed */
4960         new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
4961         if (new_filter_control_unit > 0 && new_filter_control_unit != so->so_cfil->cfi_filter_control_unit && !SKIP_FILTER_FOR_TCP_SOCKET(so)) {
4962                 return EPIPE;
4963         }
4964
4965         /*
4966          * Pass initial data for TFO.
4967          */
4968         if (IS_INITIAL_TFO_DATA(so)) {
4969                 return 0;
4970         }
4971
4972         socket_lock_assert_owned(so);
4973
4974         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4975                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4976                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4977                 return EPIPE;
4978         }
4979         if (control != NULL) {
4980                 CFIL_LOG(LOG_ERR, "so %llx control",
4981                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4982                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4983         }
4984         if (data->m_type == MT_OOBDATA) {
4985                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4986                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4987                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4988         }
4989         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4990
4991         return error;
4992 }
4993
4994 /*
4995  * Callback from socket layer soshutdownxxx()
4996  *
4997  * We may delay the shutdown write if there's outgoing data in process.
4998  *
4999  * There is no point in delaying the shutdown read because the process
5000  * indicated that it does not want to read anymore data.
5001  */
5002 int
5003 cfil_sock_shutdown(struct socket *so, int *how)
5004 {
5005         int error = 0;
5006
5007         if (IS_IP_DGRAM(so)) {
5008                 return cfil_sock_udp_shutdown(so, how);
5009         }
5010
5011         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5012                 goto done;
5013         }
5014
5015         socket_lock_assert_owned(so);
5016
5017         CFIL_LOG(LOG_INFO, "so %llx how %d",
5018             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5019
5020         /*
5021          * Check the state of the socket before the content filter
5022          */
5023         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5024                 /* read already shut down */
5025                 error = ENOTCONN;
5026                 goto done;
5027         }
5028         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5029                 /* write already shut down */
5030                 error = ENOTCONN;
5031                 goto done;
5032         }
5033
5034         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
5035                 CFIL_LOG(LOG_ERR, "so %llx drop set",
5036                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5037                 goto done;
5038         }
5039
5040         /*
5041          * shutdown read: SHUT_RD or SHUT_RDWR
5042          */
5043         if (*how != SHUT_WR) {
5044                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
5045                         error = ENOTCONN;
5046                         goto done;
5047                 }
5048                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
5049                 cfil_sock_notify_shutdown(so, SHUT_RD);
5050         }
5051         /*
5052          * shutdown write: SHUT_WR or SHUT_RDWR
5053          */
5054         if (*how != SHUT_RD) {
5055                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
5056                         error = ENOTCONN;
5057                         goto done;
5058                 }
5059                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
5060                 cfil_sock_notify_shutdown(so, SHUT_WR);
5061                 /*
5062                  * When outgoing data is pending, we delay the shutdown at the
5063                  * protocol level until the content filters give the final
5064                  * verdict on the pending data.
5065                  */
5066                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5067                         /*
5068                          * When shutting down the read and write sides at once
5069                          * we can proceed to the final shutdown of the read
5070                          * side. Otherwise, we just return.
5071                          */
5072                         if (*how == SHUT_WR) {
5073                                 error = EJUSTRETURN;
5074                         } else if (*how == SHUT_RDWR) {
5075                                 *how = SHUT_RD;
5076                         }
5077                 }
5078         }
5079 done:
5080         return error;
5081 }
5082
5083 /*
5084  * This is called when the socket is closed and there is no more
5085  * opportunity for filtering
5086  */
5087 void
5088 cfil_sock_is_closed(struct socket *so)
5089 {
5090         errno_t error = 0;
5091         int kcunit;
5092
5093         if (IS_IP_DGRAM(so)) {
5094                 cfil_sock_udp_is_closed(so);
5095                 return;
5096         }
5097
5098         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5099                 return;
5100         }
5101
5102         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5103
5104         socket_lock_assert_owned(so);
5105
5106         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5107                 /* Let the filters know of the closing */
5108                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
5109         }
5110
5111         /* Last chance to push passed data out */
5112         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
5113         if (error == 0) {
5114                 cfil_service_inject_queue(so, so->so_cfil, 1);
5115         }
5116         cfil_release_sockbuf(so, 1);
5117
5118         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
5119
5120         /* Pending data needs to go */
5121         cfil_flush_queues(so, so->so_cfil);
5122
5123         CFIL_INFO_VERIFY(so->so_cfil);
5124 }
5125
5126 /*
5127  * This is called when the socket is disconnected so let the filters
5128  * know about the disconnection and that no more data will come
5129  *
5130  * The how parameter has the same values as soshutown()
5131  */
5132 void
5133 cfil_sock_notify_shutdown(struct socket *so, int how)
5134 {
5135         errno_t error = 0;
5136         int kcunit;
5137
5138         if (IS_IP_DGRAM(so)) {
5139                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
5140                 return;
5141         }
5142
5143         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5144                 return;
5145         }
5146
5147         CFIL_LOG(LOG_INFO, "so %llx how %d",
5148             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
5149
5150         socket_lock_assert_owned(so);
5151
5152         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5153                 /* Disconnect incoming side */
5154                 if (how != SHUT_WR) {
5155                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
5156                 }
5157                 /* Disconnect outgoing side */
5158                 if (how != SHUT_RD) {
5159                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5160                 }
5161         }
5162 }
5163
5164 static int
5165 cfil_filters_attached(struct socket *so)
5166 {
5167         struct cfil_entry *entry;
5168         uint32_t kcunit;
5169         int attached = 0;
5170
5171         if (IS_IP_DGRAM(so)) {
5172                 return cfil_filters_udp_attached(so, FALSE);
5173         }
5174
5175         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5176                 return 0;
5177         }
5178
5179         socket_lock_assert_owned(so);
5180
5181         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5182                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5183
5184                 /* Are we attached to the filter? */
5185                 if (entry->cfe_filter == NULL) {
5186                         continue;
5187                 }
5188                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5189                         continue;
5190                 }
5191                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5192                         continue;
5193                 }
5194                 attached = 1;
5195                 break;
5196         }
5197
5198         return attached;
5199 }
5200
5201 /*
5202  * This is called when the socket is closed and we are waiting for
5203  * the filters to gives the final pass or drop
5204  */
5205 void
5206 cfil_sock_close_wait(struct socket *so)
5207 {
5208         lck_mtx_t *mutex_held;
5209         struct timespec ts;
5210         int error;
5211
5212         if (IS_IP_DGRAM(so)) {
5213                 cfil_sock_udp_close_wait(so);
5214                 return;
5215         }
5216
5217         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5218                 return;
5219         }
5220
5221         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5222
5223         if (so->so_proto->pr_getlock != NULL) {
5224                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5225         } else {
5226                 mutex_held = so->so_proto->pr_domain->dom_mtx;
5227         }
5228         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5229
5230         while (cfil_filters_attached(so)) {
5231                 /*
5232                  * Notify the filters we are going away so they can detach
5233                  */
5234                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5235
5236                 /*
5237                  * Make sure we need to wait after the filter are notified
5238                  * of the disconnection
5239                  */
5240                 if (cfil_filters_attached(so) == 0) {
5241                         break;
5242                 }
5243
5244                 CFIL_LOG(LOG_INFO, "so %llx waiting",
5245                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5246
5247                 ts.tv_sec = cfil_close_wait_timeout / 1000;
5248                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5249                     NSEC_PER_USEC * 1000;
5250
5251                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5252                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5253                 error = msleep((caddr_t)so->so_cfil, mutex_held,
5254                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5255                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5256
5257                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5258                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5259
5260                 /*
5261                  * Force close in case of timeout
5262                  */
5263                 if (error != 0) {
5264                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5265                         break;
5266                 }
5267         }
5268 }
5269
5270 /*
5271  * Returns the size of the data held by the content filter by using
5272  */
5273 int32_t
5274 cfil_sock_data_pending(struct sockbuf *sb)
5275 {
5276         struct socket *so = sb->sb_so;
5277         uint64_t pending = 0;
5278
5279         if (IS_IP_DGRAM(so)) {
5280                 return cfil_sock_udp_data_pending(sb, FALSE);
5281         }
5282
5283         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5284                 struct cfi_buf *cfi_buf;
5285
5286                 socket_lock_assert_owned(so);
5287
5288                 if ((sb->sb_flags & SB_RECV) == 0) {
5289                         cfi_buf = &so->so_cfil->cfi_snd;
5290                 } else {
5291                         cfi_buf = &so->so_cfil->cfi_rcv;
5292                 }
5293
5294                 pending = cfi_buf->cfi_pending_last -
5295                     cfi_buf->cfi_pending_first;
5296
5297                 /*
5298                  * If we are limited by the "chars of mbufs used" roughly
5299                  * adjust so we won't overcommit
5300                  */
5301                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5302                         pending = cfi_buf->cfi_pending_mbcnt;
5303                 }
5304         }
5305
5306         VERIFY(pending < INT32_MAX);
5307
5308         return (int32_t)(pending);
5309 }
5310
5311 /*
5312  * Return the socket buffer space used by data being held by content filters
5313  * so processes won't clog the socket buffer
5314  */
5315 int32_t
5316 cfil_sock_data_space(struct sockbuf *sb)
5317 {
5318         struct socket *so = sb->sb_so;
5319         uint64_t pending = 0;
5320
5321         if (IS_IP_DGRAM(so)) {
5322                 return cfil_sock_udp_data_pending(sb, TRUE);
5323         }
5324
5325         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5326             so->so_snd.sb_cfil_thread != current_thread()) {
5327                 struct cfi_buf *cfi_buf;
5328
5329                 socket_lock_assert_owned(so);
5330
5331                 if ((sb->sb_flags & SB_RECV) == 0) {
5332                         cfi_buf = &so->so_cfil->cfi_snd;
5333                 } else {
5334                         cfi_buf = &so->so_cfil->cfi_rcv;
5335                 }
5336
5337                 pending = cfi_buf->cfi_pending_last -
5338                     cfi_buf->cfi_pending_first;
5339
5340                 /*
5341                  * If we are limited by the "chars of mbufs used" roughly
5342                  * adjust so we won't overcommit
5343                  */
5344                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5345                         pending = cfi_buf->cfi_pending_mbcnt;
5346                 }
5347         }
5348
5349         VERIFY(pending < INT32_MAX);
5350
5351         return (int32_t)(pending);
5352 }
5353
5354 /*
5355  * A callback from the socket and protocol layer when data becomes
5356  * available in the socket buffer to give a chance for the content filter
5357  * to re-inject data that was held back
5358  */
5359 void
5360 cfil_sock_buf_update(struct sockbuf *sb)
5361 {
5362         int outgoing;
5363         int error;
5364         struct socket *so = sb->sb_so;
5365
5366         if (IS_IP_DGRAM(so)) {
5367                 cfil_sock_udp_buf_update(sb);
5368                 return;
5369         }
5370
5371         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5372                 return;
5373         }
5374
5375         if (!cfil_sbtrim) {
5376                 return;
5377         }
5378
5379         socket_lock_assert_owned(so);
5380
5381         if ((sb->sb_flags & SB_RECV) == 0) {
5382                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5383                         return;
5384                 }
5385                 outgoing = 1;
5386                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5387         } else {
5388                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5389                         return;
5390                 }
5391                 outgoing = 0;
5392                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5393         }
5394
5395         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5396             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5397
5398         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5399         if (error == 0) {
5400                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5401         }
5402         cfil_release_sockbuf(so, outgoing);
5403 }
5404
5405 int
5406 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5407     struct sysctl_req *req)
5408 {
5409 #pragma unused(oidp, arg1, arg2)
5410         int error = 0;
5411         size_t len = 0;
5412         u_int32_t i;
5413
5414         /* Read only  */
5415         if (req->newptr != USER_ADDR_NULL) {
5416                 return EPERM;
5417         }
5418
5419         cfil_rw_lock_shared(&cfil_lck_rw);
5420
5421         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5422                 struct cfil_filter_stat filter_stat;
5423                 struct content_filter *cfc = content_filters[i];
5424
5425                 if (cfc == NULL) {
5426                         continue;
5427                 }
5428
5429                 /* If just asking for the size */
5430                 if (req->oldptr == USER_ADDR_NULL) {
5431                         len += sizeof(struct cfil_filter_stat);
5432                         continue;
5433                 }
5434
5435                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5436                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5437                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5438                 filter_stat.cfs_flags = cfc->cf_flags;
5439                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5440                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5441
5442                 error = SYSCTL_OUT(req, &filter_stat,
5443                     sizeof(struct cfil_filter_stat));
5444                 if (error != 0) {
5445                         break;
5446                 }
5447         }
5448         /* If just asking for the size */
5449         if (req->oldptr == USER_ADDR_NULL) {
5450                 req->oldidx = len;
5451         }
5452
5453         cfil_rw_unlock_shared(&cfil_lck_rw);
5454
5455 #if SHOW_DEBUG
5456         if (req->oldptr != USER_ADDR_NULL) {
5457                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5458                         cfil_filter_show(i);
5459                 }
5460         }
5461 #endif
5462
5463         return error;
5464 }
5465
5466 static int
5467 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5468     struct sysctl_req *req)
5469 {
5470 #pragma unused(oidp, arg1, arg2)
5471         int error = 0;
5472         u_int32_t i;
5473         struct cfil_info *cfi;
5474
5475         /* Read only  */
5476         if (req->newptr != USER_ADDR_NULL) {
5477                 return EPERM;
5478         }
5479
5480         cfil_rw_lock_shared(&cfil_lck_rw);
5481
5482         /*
5483          * If just asking for the size,
5484          */
5485         if (req->oldptr == USER_ADDR_NULL) {
5486                 req->oldidx = cfil_sock_attached_count *
5487                     sizeof(struct cfil_sock_stat);
5488                 /* Bump the length in case new sockets gets attached */
5489                 req->oldidx += req->oldidx >> 3;
5490                 goto done;
5491         }
5492
5493         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5494                 struct cfil_entry *entry;
5495                 struct cfil_sock_stat stat;
5496                 struct socket *so = cfi->cfi_so;
5497
5498                 bzero(&stat, sizeof(struct cfil_sock_stat));
5499                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5500                 stat.cfs_sock_id = cfi->cfi_sock_id;
5501                 stat.cfs_flags = cfi->cfi_flags;
5502
5503                 if (so != NULL) {
5504                         stat.cfs_pid = so->last_pid;
5505                         memcpy(stat.cfs_uuid, so->last_uuid,
5506                             sizeof(uuid_t));
5507                         if (so->so_flags & SOF_DELEGATED) {
5508                                 stat.cfs_e_pid = so->e_pid;
5509                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5510                                     sizeof(uuid_t));
5511                         } else {
5512                                 stat.cfs_e_pid = so->last_pid;
5513                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5514                                     sizeof(uuid_t));
5515                         }
5516
5517                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5518                         stat.cfs_sock_type = so->so_proto->pr_type;
5519                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5520                 }
5521
5522                 stat.cfs_snd.cbs_pending_first =
5523                     cfi->cfi_snd.cfi_pending_first;
5524                 stat.cfs_snd.cbs_pending_last =
5525                     cfi->cfi_snd.cfi_pending_last;
5526                 stat.cfs_snd.cbs_inject_q_len =
5527                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5528                 stat.cfs_snd.cbs_pass_offset =
5529                     cfi->cfi_snd.cfi_pass_offset;
5530
5531                 stat.cfs_rcv.cbs_pending_first =
5532                     cfi->cfi_rcv.cfi_pending_first;
5533                 stat.cfs_rcv.cbs_pending_last =
5534                     cfi->cfi_rcv.cfi_pending_last;
5535                 stat.cfs_rcv.cbs_inject_q_len =
5536                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5537                 stat.cfs_rcv.cbs_pass_offset =
5538                     cfi->cfi_rcv.cfi_pass_offset;
5539
5540                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5541                         struct cfil_entry_stat *estat;
5542                         struct cfe_buf *ebuf;
5543                         struct cfe_buf_stat *sbuf;
5544
5545                         entry = &cfi->cfi_entries[i];
5546
5547                         estat = &stat.ces_entries[i];
5548
5549                         estat->ces_len = sizeof(struct cfil_entry_stat);
5550                         estat->ces_filter_id = entry->cfe_filter ?
5551                             entry->cfe_filter->cf_kcunit : 0;
5552                         estat->ces_flags = entry->cfe_flags;
5553                         estat->ces_necp_control_unit =
5554                             entry->cfe_necp_control_unit;
5555
5556                         estat->ces_last_event.tv_sec =
5557                             (int64_t)entry->cfe_last_event.tv_sec;
5558                         estat->ces_last_event.tv_usec =
5559                             (int64_t)entry->cfe_last_event.tv_usec;
5560
5561                         estat->ces_last_action.tv_sec =
5562                             (int64_t)entry->cfe_last_action.tv_sec;
5563                         estat->ces_last_action.tv_usec =
5564                             (int64_t)entry->cfe_last_action.tv_usec;
5565
5566                         ebuf = &entry->cfe_snd;
5567                         sbuf = &estat->ces_snd;
5568                         sbuf->cbs_pending_first =
5569                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5570                         sbuf->cbs_pending_last =
5571                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5572                         sbuf->cbs_ctl_first =
5573                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5574                         sbuf->cbs_ctl_last =
5575                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5576                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5577                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5578                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5579
5580                         ebuf = &entry->cfe_rcv;
5581                         sbuf = &estat->ces_rcv;
5582                         sbuf->cbs_pending_first =
5583                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5584                         sbuf->cbs_pending_last =
5585                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5586                         sbuf->cbs_ctl_first =
5587                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5588                         sbuf->cbs_ctl_last =
5589                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5590                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5591                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5592                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5593                 }
5594                 error = SYSCTL_OUT(req, &stat,
5595                     sizeof(struct cfil_sock_stat));
5596                 if (error != 0) {
5597                         break;
5598                 }
5599         }
5600 done:
5601         cfil_rw_unlock_shared(&cfil_lck_rw);
5602
5603 #if SHOW_DEBUG
5604         if (req->oldptr != USER_ADDR_NULL) {
5605                 cfil_info_show();
5606         }
5607 #endif
5608
5609         return error;
5610 }
5611
5612 /*
5613  * UDP Socket Support
5614  */
5615 static void
5616 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5617 {
5618         char local[MAX_IPv6_STR_LEN + 6];
5619         char remote[MAX_IPv6_STR_LEN + 6];
5620         const void  *addr;
5621
5622         // No sock or not UDP, no-op
5623         if (so == NULL || entry == NULL) {
5624                 return;
5625         }
5626
5627         local[0] = remote[0] = 0x0;
5628
5629         switch (entry->cfentry_family) {
5630         case AF_INET6:
5631                 addr = &entry->cfentry_laddr.addr6;
5632                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5633                 addr = &entry->cfentry_faddr.addr6;
5634                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5635                 break;
5636         case AF_INET:
5637                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5638                 inet_ntop(AF_INET, addr, local, sizeof(local));
5639                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5640                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5641                 break;
5642         default:
5643                 return;
5644         }
5645
5646         CFIL_LOG(level, "<%s>: <%s(%d) so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s hash %X",
5647             msg,
5648             IS_UDP(so) ? "UDP" : "proto", GET_SO_PROTO(so),
5649             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5650             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote,
5651             entry->cfentry_flowhash);
5652 }
5653
5654 static void
5655 cfil_inp_log(int level, struct socket *so, const char* msg)
5656 {
5657         struct inpcb *inp = NULL;
5658         char local[MAX_IPv6_STR_LEN + 6];
5659         char remote[MAX_IPv6_STR_LEN + 6];
5660         const void  *addr;
5661
5662         if (so == NULL) {
5663                 return;
5664         }
5665
5666         inp = sotoinpcb(so);
5667         if (inp == NULL) {
5668                 return;
5669         }
5670
5671         local[0] = remote[0] = 0x0;
5672
5673         if (inp->inp_vflag & INP_IPV6) {
5674                 addr = &inp->in6p_laddr.s6_addr32;
5675                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5676                 addr = &inp->in6p_faddr.s6_addr32;
5677                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5678         } else {
5679                 addr = &inp->inp_laddr.s_addr;
5680                 inet_ntop(AF_INET, addr, local, sizeof(local));
5681                 addr = &inp->inp_faddr.s_addr;
5682                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5683         }
5684
5685         if (so->so_cfil != NULL) {
5686                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5687                     msg, IS_UDP(so) ? "UDP" : "TCP",
5688                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5689                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5690         } else {
5691                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5692                     msg, IS_UDP(so) ? "UDP" : "TCP",
5693                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5694                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5695         }
5696 }
5697
5698 static void
5699 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5700 {
5701         if (cfil_info == NULL) {
5702                 return;
5703         }
5704
5705         if (cfil_info->cfi_hash_entry != NULL) {
5706                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5707         } else {
5708                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5709         }
5710 }
5711
5712 errno_t
5713 cfil_db_init(struct socket *so)
5714 {
5715         errno_t error = 0;
5716         struct cfil_db *db = NULL;
5717
5718         CFIL_LOG(LOG_INFO, "");
5719
5720         db = zalloc(cfil_db_zone);
5721         if (db == NULL) {
5722                 error = ENOMEM;
5723                 goto done;
5724         }
5725         bzero(db, sizeof(struct cfil_db));
5726         db->cfdb_so = so;
5727         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5728         if (db->cfdb_hashbase == NULL) {
5729                 zfree(cfil_db_zone, db);
5730                 db = NULL;
5731                 error = ENOMEM;
5732                 goto done;
5733         }
5734
5735         so->so_cfil_db = db;
5736
5737 done:
5738         return error;
5739 }
5740
5741 void
5742 cfil_db_free(struct socket *so)
5743 {
5744         struct cfil_hash_entry *entry = NULL;
5745         struct cfil_hash_entry *temp_entry = NULL;
5746         struct cfilhashhead *cfilhash = NULL;
5747         struct cfil_db *db = NULL;
5748
5749         CFIL_LOG(LOG_INFO, "");
5750
5751         if (so == NULL || so->so_cfil_db == NULL) {
5752                 return;
5753         }
5754         db = so->so_cfil_db;
5755
5756 #if LIFECYCLE_DEBUG
5757         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5758             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5759 #endif
5760
5761         for (int i = 0; i < CFILHASHSIZE; i++) {
5762                 cfilhash = &db->cfdb_hashbase[i];
5763                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5764                         if (entry->cfentry_cfil != NULL) {
5765 #if LIFECYCLE_DEBUG
5766                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5767 #endif
5768                                 CFIL_INFO_FREE(entry->cfentry_cfil);
5769                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5770                                 entry->cfentry_cfil = NULL;
5771                         }
5772
5773                         cfil_db_delete_entry(db, entry);
5774                         if (so->so_flags & SOF_CONTENT_FILTER) {
5775                                 if (db->cfdb_count == 0) {
5776                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5777                                 }
5778                                 VERIFY(so->so_usecount > 0);
5779                                 so->so_usecount--;
5780                         }
5781                 }
5782         }
5783
5784         // Make sure all entries are cleaned up!
5785         VERIFY(db->cfdb_count == 0);
5786 #if LIFECYCLE_DEBUG
5787         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5788 #endif
5789
5790         hashdestroy(db->cfdb_hashbase, M_CFIL, db->cfdb_hashmask);
5791         zfree(cfil_db_zone, db);
5792         so->so_cfil_db = NULL;
5793 }
5794
5795 static bool
5796 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr, bool islocalUpdate)
5797 {
5798         struct sockaddr_in *sin = NULL;
5799         struct sockaddr_in6 *sin6 = NULL;
5800
5801         if (entry == NULL || addr == NULL) {
5802                 return FALSE;
5803         }
5804
5805         switch (addr->sa_family) {
5806         case AF_INET:
5807                 sin = satosin(addr);
5808                 if (sin->sin_len != sizeof(*sin)) {
5809                         return FALSE;
5810                 }
5811                 if (isLocal == TRUE) {
5812                         if (sin->sin_port) {
5813                                 entry->cfentry_lport = sin->sin_port;
5814                                 if (islocalUpdate) {
5815                                         entry->cfentry_lport_updated = TRUE;
5816                                 }
5817                         }
5818                         if (sin->sin_addr.s_addr) {
5819                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5820                                 if (islocalUpdate) {
5821                                         entry->cfentry_laddr_updated = TRUE;
5822                                 }
5823                         }
5824                 } else {
5825                         if (sin->sin_port) {
5826                                 entry->cfentry_fport = sin->sin_port;
5827                         }
5828                         if (sin->sin_addr.s_addr) {
5829                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5830                         }
5831                 }
5832                 entry->cfentry_family = AF_INET;
5833                 return TRUE;
5834         case AF_INET6:
5835                 sin6 = satosin6(addr);
5836                 if (sin6->sin6_len != sizeof(*sin6)) {
5837                         return FALSE;
5838                 }
5839                 if (isLocal == TRUE) {
5840                         if (sin6->sin6_port) {
5841                                 entry->cfentry_lport = sin6->sin6_port;
5842                                 if (islocalUpdate) {
5843                                         entry->cfentry_lport_updated = TRUE;
5844                                 }
5845                         }
5846                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5847                                 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5848                                 if (islocalUpdate) {
5849                                         entry->cfentry_laddr_updated = TRUE;
5850                                 }
5851                         }
5852                 } else {
5853                         if (sin6->sin6_port) {
5854                                 entry->cfentry_fport = sin6->sin6_port;
5855                         }
5856                         if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
5857                                 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5858                         }
5859                 }
5860                 entry->cfentry_family = AF_INET6;
5861                 return TRUE;
5862         default:
5863                 return FALSE;
5864         }
5865 }
5866
5867 static bool
5868 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp, bool islocalUpdate)
5869 {
5870         if (entry == NULL || inp == NULL) {
5871                 return FALSE;
5872         }
5873
5874         if (inp->inp_vflag & INP_IPV6) {
5875                 if (isLocal == TRUE) {
5876                         if (inp->inp_lport) {
5877                                 entry->cfentry_lport = inp->inp_lport;
5878                                 if (islocalUpdate) {
5879                                         entry->cfentry_lport_updated = TRUE;
5880                                 }
5881                         }
5882                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
5883                                 entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5884                                 if (islocalUpdate) {
5885                                         entry->cfentry_laddr_updated = TRUE;
5886                                 }
5887                         }
5888                 } else {
5889                         if (inp->inp_fport) {
5890                                 entry->cfentry_fport = inp->inp_fport;
5891                         }
5892                         if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
5893                                 entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5894                         }
5895                 }
5896                 entry->cfentry_family = AF_INET6;
5897                 return TRUE;
5898         } else if (inp->inp_vflag & INP_IPV4) {
5899                 if (isLocal == TRUE) {
5900                         if (inp->inp_lport) {
5901                                 entry->cfentry_lport = inp->inp_lport;
5902                                 if (islocalUpdate) {
5903                                         entry->cfentry_lport_updated = TRUE;
5904                                 }
5905                         }
5906                         if (inp->inp_laddr.s_addr) {
5907                                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5908                                 if (islocalUpdate) {
5909                                         entry->cfentry_laddr_updated = TRUE;
5910                                 }
5911                         }
5912                 } else {
5913                         if (inp->inp_fport) {
5914                                 entry->cfentry_fport = inp->inp_fport;
5915                         }
5916                         if (inp->inp_faddr.s_addr) {
5917                                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5918                         }
5919                 }
5920                 entry->cfentry_family = AF_INET;
5921                 return TRUE;
5922         }
5923         return FALSE;
5924 }
5925
5926 bool
5927 check_port(struct sockaddr *addr, u_short port)
5928 {
5929         struct sockaddr_in *sin = NULL;
5930         struct sockaddr_in6 *sin6 = NULL;
5931
5932         if (addr == NULL || port == 0) {
5933                 return FALSE;
5934         }
5935
5936         switch (addr->sa_family) {
5937         case AF_INET:
5938                 sin = satosin(addr);
5939                 if (sin->sin_len != sizeof(*sin)) {
5940                         return FALSE;
5941                 }
5942                 if (port == ntohs(sin->sin_port)) {
5943                         return TRUE;
5944                 }
5945                 break;
5946         case AF_INET6:
5947                 sin6 = satosin6(addr);
5948                 if (sin6->sin6_len != sizeof(*sin6)) {
5949                         return FALSE;
5950                 }
5951                 if (port == ntohs(sin6->sin6_port)) {
5952                         return TRUE;
5953                 }
5954                 break;
5955         default:
5956                 break;
5957         }
5958         return FALSE;
5959 }
5960
5961 struct cfil_hash_entry *
5962 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5963 {
5964         struct cfilhashhead *cfilhash = NULL;
5965         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5966         struct cfil_hash_entry *nextentry;
5967
5968         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5969                 return NULL;
5970         }
5971
5972         flowhash &= db->cfdb_hashmask;
5973         cfilhash = &db->cfdb_hashbase[flowhash];
5974
5975         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5976                 if (nextentry->cfentry_cfil != NULL &&
5977                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5978                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5979                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5980                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5981                         return nextentry;
5982                 }
5983         }
5984
5985         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5986             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5987         return NULL;
5988 }
5989
5990 struct cfil_hash_entry *
5991 cfil_db_lookup_entry_internal(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly, boolean_t withLocalPort)
5992 {
5993         struct cfil_hash_entry matchentry = { };
5994         struct cfil_hash_entry *nextentry = NULL;
5995         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5996         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5997         u_int16_t hashkey_fport = 0, hashkey_lport = 0;
5998         int inp_hash_element = 0;
5999         struct cfilhashhead *cfilhash = NULL;
6000
6001         CFIL_LOG(LOG_INFO, "");
6002
6003         if (inp == NULL) {
6004                 goto done;
6005         }
6006
6007         if (local != NULL) {
6008                 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local, FALSE);
6009         } else {
6010                 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp, FALSE);
6011         }
6012         if (remote != NULL) {
6013                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote, FALSE);
6014         } else {
6015                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp, FALSE);
6016         }
6017
6018         if (inp->inp_vflag & INP_IPV6) {
6019                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
6020                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr6.s6_addr32[3] : 0;
6021         } else {
6022                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
6023                 hashkey_laddr = (remoteOnly == false) ? matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr : 0;
6024         }
6025
6026         hashkey_fport = matchentry.cfentry_fport;
6027         hashkey_lport = (remoteOnly == false || withLocalPort == true) ? matchentry.cfentry_lport : 0;
6028
6029         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr, hashkey_lport, hashkey_fport);
6030         inp_hash_element &= db->cfdb_hashmask;
6031         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6032
6033         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
6034                 if ((inp->inp_vflag & INP_IPV6) &&
6035                     (remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6036                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6037                     (remoteOnly || nextentry->cfentry_laddr_updated || IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6)) &&
6038                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
6039 #if DATA_DEBUG
6040                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
6041 #endif
6042                         return nextentry;
6043                 } else if ((remoteOnly || nextentry->cfentry_lport_updated || nextentry->cfentry_lport == matchentry.cfentry_lport) &&
6044                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
6045                     (remoteOnly || nextentry->cfentry_laddr_updated || nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr) &&
6046                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
6047 #if DATA_DEBUG
6048                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
6049 #endif
6050                         return nextentry;
6051                 }
6052         }
6053
6054 done:
6055 #if DATA_DEBUG
6056         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
6057 #endif
6058         return NULL;
6059 }
6060
6061 struct cfil_hash_entry *
6062 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote, boolean_t remoteOnly)
6063 {
6064         struct cfil_hash_entry *entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, false);
6065         if (entry == NULL && remoteOnly == true) {
6066                 entry = cfil_db_lookup_entry_internal(db, local, remote, remoteOnly, true);
6067         }
6068         return entry;
6069 }
6070
6071 cfil_sock_id_t
6072 cfil_sock_id_from_datagram_socket(struct socket *so, struct sockaddr *local, struct sockaddr *remote)
6073 {
6074         struct cfil_hash_entry *hash_entry = NULL;
6075
6076         socket_lock_assert_owned(so);
6077
6078         if (so->so_cfil_db == NULL) {
6079                 return CFIL_SOCK_ID_NONE;
6080         }
6081
6082         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6083         if (hash_entry == NULL) {
6084                 // No match with both local and remote, try match with remote only
6085                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6086         }
6087         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6088                 return CFIL_SOCK_ID_NONE;
6089         }
6090
6091         return hash_entry->cfentry_cfil->cfi_sock_id;
6092 }
6093
6094 void
6095 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
6096 {
6097         if (hash_entry == NULL) {
6098                 return;
6099         }
6100         if (db == NULL || db->cfdb_count == 0) {
6101                 return;
6102         }
6103         db->cfdb_count--;
6104         if (db->cfdb_only_entry == hash_entry) {
6105                 db->cfdb_only_entry = NULL;
6106         }
6107         LIST_REMOVE(hash_entry, cfentry_link);
6108         zfree(cfil_hash_entry_zone, hash_entry);
6109 }
6110
6111 struct cfil_hash_entry *
6112 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
6113 {
6114         struct cfil_hash_entry *entry = NULL;
6115         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6116         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
6117         int inp_hash_element = 0;
6118         struct cfilhashhead *cfilhash = NULL;
6119
6120         CFIL_LOG(LOG_INFO, "");
6121
6122         if (inp == NULL) {
6123                 goto done;
6124         }
6125
6126         entry = zalloc(cfil_hash_entry_zone);
6127         if (entry == NULL) {
6128                 goto done;
6129         }
6130         bzero(entry, sizeof(struct cfil_hash_entry));
6131
6132         if (local != NULL) {
6133                 fill_cfil_hash_entry_from_address(entry, TRUE, local, FALSE);
6134         } else {
6135                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, FALSE);
6136         }
6137         if (remote != NULL) {
6138                 fill_cfil_hash_entry_from_address(entry, FALSE, remote, FALSE);
6139         } else {
6140                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp, FALSE);
6141         }
6142         entry->cfentry_lastused = net_uptime();
6143
6144         if (inp->inp_vflag & INP_IPV6) {
6145                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
6146                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
6147         } else {
6148                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
6149                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
6150         }
6151         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
6152             entry->cfentry_lport, entry->cfentry_fport);
6153         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
6154
6155         cfilhash = &db->cfdb_hashbase[inp_hash_element];
6156
6157         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
6158         db->cfdb_count++;
6159         db->cfdb_only_entry = entry;
6160         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
6161
6162 done:
6163         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
6164         return entry;
6165 }
6166
6167 void
6168 cfil_db_update_entry_local(struct cfil_db *db, struct cfil_hash_entry *entry, struct sockaddr *local, struct mbuf *control)
6169 {
6170         struct inpcb *inp = sotoinpcb(db->cfdb_so);
6171         union sockaddr_in_4_6 address_buf = { };
6172
6173         CFIL_LOG(LOG_INFO, "");
6174
6175         if (inp == NULL || entry == NULL) {
6176                 return;
6177         }
6178
6179         if (LOCAL_ADDRESS_NEEDS_UPDATE(entry)) {
6180                 // Flow does not have a local address yet.  Retrieve local address
6181                 // from control mbufs if present.
6182                 if (local == NULL && control != NULL) {
6183                         uint8_t *addr_ptr = NULL;
6184                         int size = cfil_sock_udp_get_address_from_control(entry->cfentry_family, control, &addr_ptr);
6185
6186                         if (size && addr_ptr) {
6187                                 switch (entry->cfentry_family) {
6188                                 case AF_INET:
6189                                         if (size == sizeof(struct in_addr)) {
6190                                                 address_buf.sin.sin_port = 0;
6191                                                 address_buf.sin.sin_family = AF_INET;
6192                                                 address_buf.sin.sin_len = sizeof(struct sockaddr_in);
6193                                                 (void) memcpy(&address_buf.sin.sin_addr, addr_ptr, sizeof(struct in_addr));
6194                                                 local = sintosa(&address_buf.sin);
6195                                         }
6196                                         break;
6197                                 case AF_INET6:
6198                                         if (size == sizeof(struct in6_addr)) {
6199                                                 address_buf.sin6.sin6_port = 0;
6200                                                 address_buf.sin6.sin6_family = AF_INET6;
6201                                                 address_buf.sin6.sin6_len = sizeof(struct sockaddr_in6);
6202                                                 (void) memcpy(&address_buf.sin6.sin6_addr, addr_ptr, sizeof(struct in6_addr));
6203                                                 local = sin6tosa(&address_buf.sin6);
6204                                         }
6205                                         break;
6206                                 default:
6207                                         break;
6208                                 }
6209                         }
6210                 }
6211                 if (local != NULL) {
6212                         fill_cfil_hash_entry_from_address(entry, TRUE, local, TRUE);
6213                 } else {
6214                         fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6215                 }
6216         }
6217
6218         if (LOCAL_PORT_NEEDS_UPDATE(entry, db->cfdb_so)) {
6219                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp, TRUE);
6220         }
6221
6222         return;
6223 }
6224
6225 struct cfil_info *
6226 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
6227 {
6228         struct cfil_hash_entry *hash_entry = NULL;
6229
6230         CFIL_LOG(LOG_INFO, "");
6231
6232         if (db == NULL || id == 0) {
6233                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
6234                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
6235                 return NULL;
6236         }
6237
6238         // This is an optimization for connected UDP socket which only has one flow.
6239         // No need to do the hash lookup.
6240         if (db->cfdb_count == 1) {
6241                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
6242                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
6243                         return db->cfdb_only_entry->cfentry_cfil;
6244                 }
6245         }
6246
6247         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
6248         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
6249 }
6250
6251 struct cfil_hash_entry *
6252 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote, struct mbuf *control, int debug)
6253 {
6254         struct cfil_hash_entry *hash_entry = NULL;
6255         int new_filter_control_unit = 0;
6256
6257         errno_t error = 0;
6258         socket_lock_assert_owned(so);
6259
6260         // If new socket, allocate cfil db
6261         if (so->so_cfil_db == NULL) {
6262                 if (cfil_db_init(so) != 0) {
6263                         return NULL;
6264                 }
6265         }
6266
6267         // See if flow already exists.
6268         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, false);
6269         if (hash_entry == NULL) {
6270                 // No match with both local and remote, try match with remote only
6271                 hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote, true);
6272         }
6273         if (hash_entry != NULL) {
6274                 /* Drop pre-existing UDP flow if filter state changed */
6275                 new_filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6276                 if (new_filter_control_unit > 0 &&
6277                     new_filter_control_unit != hash_entry->cfentry_cfil->cfi_filter_control_unit) {
6278                         return NULL;
6279                 }
6280
6281                 // Try to update flow info from socket and/or control mbufs if necessary
6282                 if (LOCAL_ADDRESS_NEEDS_UPDATE(hash_entry) || LOCAL_PORT_NEEDS_UPDATE(hash_entry, so)) {
6283                         cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6284                 }
6285                 return hash_entry;
6286         }
6287
6288         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
6289         if (hash_entry == NULL) {
6290                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6291                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
6292                 return NULL;
6293         }
6294
6295         if (cfil_info_alloc(so, hash_entry) == NULL ||
6296             hash_entry->cfentry_cfil == NULL) {
6297                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6298                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
6299                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
6300                 return NULL;
6301         }
6302         hash_entry->cfentry_cfil->cfi_filter_control_unit = filter_control_unit;
6303         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
6304         hash_entry->cfentry_cfil->cfi_debug = debug;
6305
6306 #if LIFECYCLE_DEBUG
6307         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6308 #endif
6309
6310         // Check if we can update the new flow's local address from control mbufs
6311         if (control != NULL) {
6312                 cfil_db_update_entry_local(so->so_cfil_db, hash_entry, local, control);
6313         }
6314
6315         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
6316                 CFIL_INFO_FREE(hash_entry->cfentry_cfil);
6317                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
6318                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
6319                     filter_control_unit);
6320                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6321                 return NULL;
6322         }
6323         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6324             (uint64_t)VM_KERNEL_ADDRPERM(so),
6325             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6326
6327         so->so_flags |= SOF_CONTENT_FILTER;
6328         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6329
6330         /* Hold a reference on the socket for each flow */
6331         so->so_usecount++;
6332
6333         if (debug) {
6334                 cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
6335         }
6336
6337         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6338             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6339         /* We can recover from flow control or out of memory errors */
6340         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6341                 return NULL;
6342         }
6343
6344         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6345         return hash_entry;
6346 }
6347
6348 int
6349 cfil_sock_udp_get_address_from_control(sa_family_t family, struct mbuf *control, uint8_t **address_ptr)
6350 {
6351         struct cmsghdr *cm;
6352         struct in6_pktinfo *pi6;
6353
6354         if (control == NULL || address_ptr == NULL) {
6355                 return 0;
6356         }
6357
6358         while (control) {
6359                 if (control->m_type != MT_CONTROL) {
6360                         control = control->m_next;
6361                         continue;
6362                 }
6363
6364                 for (cm = M_FIRST_CMSGHDR(control);
6365                     is_cmsg_valid(control, cm);
6366                     cm = M_NXT_CMSGHDR(control, cm)) {
6367                         switch (cm->cmsg_type) {
6368                         case IP_RECVDSTADDR:
6369                                 if (family == AF_INET &&
6370                                     cm->cmsg_level == IPPROTO_IP &&
6371                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in_addr))) {
6372                                         *address_ptr = CMSG_DATA(cm);
6373                                         return sizeof(struct in_addr);
6374                                 }
6375                                 break;
6376                         case IPV6_PKTINFO:
6377                         case IPV6_2292PKTINFO:
6378                                 if (family == AF_INET6 &&
6379                                     cm->cmsg_level == IPPROTO_IPV6 &&
6380                                     cm->cmsg_len == CMSG_LEN(sizeof(struct in6_pktinfo))) {
6381                                         pi6 = (struct in6_pktinfo *)(void *)CMSG_DATA(cm);
6382                                         *address_ptr = (uint8_t *)&pi6->ipi6_addr;
6383                                         return sizeof(struct in6_addr);
6384                                 }
6385                                 break;
6386                         default:
6387                                 break;
6388                         }
6389                 }
6390
6391                 control = control->m_next;
6392         }
6393         return 0;
6394 }
6395
6396 errno_t
6397 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6398     struct sockaddr *local, struct sockaddr *remote,
6399     struct mbuf *data, struct mbuf *control, uint32_t flags)
6400 {
6401 #pragma unused(outgoing, so, local, remote, data, control, flags)
6402         errno_t error = 0;
6403         uint32_t filter_control_unit;
6404         struct cfil_hash_entry *hash_entry = NULL;
6405         struct cfil_info *cfil_info = NULL;
6406         int debug = 0;
6407
6408         socket_lock_assert_owned(so);
6409
6410         if (cfil_active_count == 0) {
6411                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6412                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6413                 return error;
6414         }
6415
6416         // Socket has been blessed
6417         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6418                 return error;
6419         }
6420
6421         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6422         if (filter_control_unit == 0) {
6423                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6424                 return error;
6425         }
6426
6427         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6428                 return error;
6429         }
6430
6431         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6432                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6433                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6434                 return error;
6435         }
6436
6437         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote, control, debug);
6438         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6439                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6440                 return EPIPE;
6441         }
6442         // Update last used timestamp, this is for flow Idle TO
6443         hash_entry->cfentry_lastused = net_uptime();
6444         cfil_info = hash_entry->cfentry_cfil;
6445
6446         if (cfil_info->cfi_flags & CFIF_DROP) {
6447 #if DATA_DEBUG
6448                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6449 #endif
6450                 return EPIPE;
6451         }
6452         if (control != NULL) {
6453                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6454         }
6455         if (data->m_type == MT_OOBDATA) {
6456                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6457                     (uint64_t)VM_KERNEL_ADDRPERM(so));
6458                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6459         }
6460
6461         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6462
6463         return error;
6464 }
6465
6466 /*
6467  * Go through all UDP flows for specified socket and returns TRUE if
6468  * any flow is still attached.  If need_wait is TRUE, wait on first
6469  * attached flow.
6470  */
6471 static int
6472 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6473 {
6474         struct timespec ts;
6475         lck_mtx_t *mutex_held;
6476         struct cfilhashhead *cfilhash = NULL;
6477         struct cfil_db *db = NULL;
6478         struct cfil_hash_entry *hash_entry = NULL;
6479         struct cfil_hash_entry *temp_hash_entry = NULL;
6480         struct cfil_info *cfil_info = NULL;
6481         struct cfil_entry *entry = NULL;
6482         errno_t error = 0;
6483         int kcunit;
6484         int attached = 0;
6485         uint64_t sock_flow_id = 0;
6486
6487         socket_lock_assert_owned(so);
6488
6489         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6490                 if (so->so_proto->pr_getlock != NULL) {
6491                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6492                 } else {
6493                         mutex_held = so->so_proto->pr_domain->dom_mtx;
6494                 }
6495                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6496
6497                 db = so->so_cfil_db;
6498
6499                 for (int i = 0; i < CFILHASHSIZE; i++) {
6500                         cfilhash = &db->cfdb_hashbase[i];
6501
6502                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6503                                 if (hash_entry->cfentry_cfil != NULL) {
6504                                         cfil_info = hash_entry->cfentry_cfil;
6505                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6506                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
6507
6508                                                 /* Are we attached to the filter? */
6509                                                 if (entry->cfe_filter == NULL) {
6510                                                         continue;
6511                                                 }
6512
6513                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6514                                                         continue;
6515                                                 }
6516                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6517                                                         continue;
6518                                                 }
6519
6520                                                 attached = 1;
6521
6522                                                 if (need_wait == TRUE) {
6523 #if LIFECYCLE_DEBUG
6524                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6525 #endif
6526
6527                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
6528                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6529                                                             NSEC_PER_USEC * 1000;
6530
6531                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6532                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6533                                                         sock_flow_id = cfil_info->cfi_sock_id;
6534
6535                                                         error = msleep((caddr_t)cfil_info, mutex_held,
6536                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6537
6538                                                         // Woke up from sleep, validate if cfil_info is still valid
6539                                                         if (so->so_cfil_db == NULL ||
6540                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6541                                                                 // cfil_info is not valid, do not continue
6542                                                                 goto done;
6543                                                         }
6544
6545                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6546
6547 #if LIFECYCLE_DEBUG
6548                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6549 #endif
6550
6551                                                         /*
6552                                                          * Force close in case of timeout
6553                                                          */
6554                                                         if (error != 0) {
6555                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6556 #if LIFECYCLE_DEBUG
6557                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6558 #endif
6559                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6560                                                         }
6561                                                 }
6562                                                 goto done;
6563                                         }
6564                                 }
6565                         }
6566                 }
6567         }
6568
6569 done:
6570         return attached;
6571 }
6572
6573 int32_t
6574 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6575 {
6576         struct socket *so = sb->sb_so;
6577         struct cfi_buf *cfi_buf;
6578         uint64_t pending = 0;
6579         uint64_t total_pending = 0;
6580         struct cfilhashhead *cfilhash = NULL;
6581         struct cfil_db *db = NULL;
6582         struct cfil_hash_entry *hash_entry = NULL;
6583         struct cfil_hash_entry *temp_hash_entry = NULL;
6584
6585         socket_lock_assert_owned(so);
6586
6587         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6588             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6589                 db = so->so_cfil_db;
6590
6591                 for (int i = 0; i < CFILHASHSIZE; i++) {
6592                         cfilhash = &db->cfdb_hashbase[i];
6593
6594                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6595                                 if (hash_entry->cfentry_cfil != NULL) {
6596                                         if ((sb->sb_flags & SB_RECV) == 0) {
6597                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6598                                         } else {
6599                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6600                                         }
6601
6602                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6603                                         /*
6604                                          * If we are limited by the "chars of mbufs used" roughly
6605                                          * adjust so we won't overcommit
6606                                          */
6607                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6608                                                 pending = cfi_buf->cfi_pending_mbcnt;
6609                                         }
6610
6611                                         total_pending += pending;
6612                                 }
6613                         }
6614                 }
6615
6616                 VERIFY(total_pending < INT32_MAX);
6617 #if DATA_DEBUG
6618                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6619                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6620                     total_pending, check_thread);
6621 #endif
6622         }
6623
6624         return (int32_t)(total_pending);
6625 }
6626
6627 int
6628 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6629 {
6630         struct cfil_info *cfil_info = NULL;
6631         struct cfilhashhead *cfilhash = NULL;
6632         struct cfil_db *db = NULL;
6633         struct cfil_hash_entry *hash_entry = NULL;
6634         struct cfil_hash_entry *temp_hash_entry = NULL;
6635         errno_t error = 0;
6636         int done_count = 0;
6637         int kcunit;
6638
6639         socket_lock_assert_owned(so);
6640
6641         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6642                 db = so->so_cfil_db;
6643
6644                 for (int i = 0; i < CFILHASHSIZE; i++) {
6645                         cfilhash = &db->cfdb_hashbase[i];
6646
6647                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6648                                 if (hash_entry->cfentry_cfil != NULL) {
6649                                         cfil_info = hash_entry->cfentry_cfil;
6650
6651                                         // This flow is marked as DROP
6652                                         if (cfil_info->cfi_flags & drop_flag) {
6653                                                 done_count++;
6654                                                 continue;
6655                                         }
6656
6657                                         // This flow has been shut already, skip
6658                                         if (cfil_info->cfi_flags & shut_flag) {
6659                                                 continue;
6660                                         }
6661                                         // Mark flow as shut
6662                                         cfil_info->cfi_flags |= shut_flag;
6663                                         done_count++;
6664
6665                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6666                                                 /* Disconnect incoming side */
6667                                                 if (how != SHUT_WR) {
6668                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6669                                                 }
6670                                                 /* Disconnect outgoing side */
6671                                                 if (how != SHUT_RD) {
6672                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6673                                                 }
6674                                         }
6675                                 }
6676                         }
6677                 }
6678         }
6679
6680         if (done_count == 0) {
6681                 error = ENOTCONN;
6682         }
6683         return error;
6684 }
6685
6686 int
6687 cfil_sock_udp_shutdown(struct socket *so, int *how)
6688 {
6689         int error = 0;
6690
6691         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6692                 goto done;
6693         }
6694
6695         socket_lock_assert_owned(so);
6696
6697         CFIL_LOG(LOG_INFO, "so %llx how %d",
6698             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6699
6700         /*
6701          * Check the state of the socket before the content filter
6702          */
6703         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6704                 /* read already shut down */
6705                 error = ENOTCONN;
6706                 goto done;
6707         }
6708         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6709                 /* write already shut down */
6710                 error = ENOTCONN;
6711                 goto done;
6712         }
6713
6714         /*
6715          * shutdown read: SHUT_RD or SHUT_RDWR
6716          */
6717         if (*how != SHUT_WR) {
6718                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6719                 if (error != 0) {
6720                         goto done;
6721                 }
6722         }
6723         /*
6724          * shutdown write: SHUT_WR or SHUT_RDWR
6725          */
6726         if (*how != SHUT_RD) {
6727                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6728                 if (error != 0) {
6729                         goto done;
6730                 }
6731
6732                 /*
6733                  * When outgoing data is pending, we delay the shutdown at the
6734                  * protocol level until the content filters give the final
6735                  * verdict on the pending data.
6736                  */
6737                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6738                         /*
6739                          * When shutting down the read and write sides at once
6740                          * we can proceed to the final shutdown of the read
6741                          * side. Otherwise, we just return.
6742                          */
6743                         if (*how == SHUT_WR) {
6744                                 error = EJUSTRETURN;
6745                         } else if (*how == SHUT_RDWR) {
6746                                 *how = SHUT_RD;
6747                         }
6748                 }
6749         }
6750 done:
6751         return error;
6752 }
6753
6754 void
6755 cfil_sock_udp_close_wait(struct socket *so)
6756 {
6757         socket_lock_assert_owned(so);
6758
6759         while (cfil_filters_udp_attached(so, FALSE)) {
6760                 /*
6761                  * Notify the filters we are going away so they can detach
6762                  */
6763                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6764
6765                 /*
6766                  * Make sure we need to wait after the filter are notified
6767                  * of the disconnection
6768                  */
6769                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6770                         break;
6771                 }
6772         }
6773 }
6774
6775 void
6776 cfil_sock_udp_is_closed(struct socket *so)
6777 {
6778         struct cfil_info *cfil_info = NULL;
6779         struct cfilhashhead *cfilhash = NULL;
6780         struct cfil_db *db = NULL;
6781         struct cfil_hash_entry *hash_entry = NULL;
6782         struct cfil_hash_entry *temp_hash_entry = NULL;
6783         errno_t error = 0;
6784         int kcunit;
6785
6786         socket_lock_assert_owned(so);
6787
6788         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6789                 db = so->so_cfil_db;
6790
6791                 for (int i = 0; i < CFILHASHSIZE; i++) {
6792                         cfilhash = &db->cfdb_hashbase[i];
6793
6794                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6795                                 if (hash_entry->cfentry_cfil != NULL) {
6796                                         cfil_info = hash_entry->cfentry_cfil;
6797
6798                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6799                                                 /* Let the filters know of the closing */
6800                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6801                                         }
6802
6803                                         /* Last chance to push passed data out */
6804                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6805                                         if (error == 0) {
6806                                                 cfil_service_inject_queue(so, cfil_info, 1);
6807                                         }
6808                                         cfil_release_sockbuf(so, 1);
6809
6810                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6811
6812                                         /* Pending data needs to go */
6813                                         cfil_flush_queues(so, cfil_info);
6814
6815                                         CFIL_INFO_VERIFY(cfil_info);
6816                                 }
6817                         }
6818                 }
6819         }
6820 }
6821
6822 void
6823 cfil_sock_udp_buf_update(struct sockbuf *sb)
6824 {
6825         struct cfil_info *cfil_info = NULL;
6826         struct cfilhashhead *cfilhash = NULL;
6827         struct cfil_db *db = NULL;
6828         struct cfil_hash_entry *hash_entry = NULL;
6829         struct cfil_hash_entry *temp_hash_entry = NULL;
6830         errno_t error = 0;
6831         int outgoing;
6832         struct socket *so = sb->sb_so;
6833
6834         socket_lock_assert_owned(so);
6835
6836         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6837                 if (!cfil_sbtrim) {
6838                         return;
6839                 }
6840
6841                 db = so->so_cfil_db;
6842
6843                 for (int i = 0; i < CFILHASHSIZE; i++) {
6844                         cfilhash = &db->cfdb_hashbase[i];
6845
6846                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6847                                 if (hash_entry->cfentry_cfil != NULL) {
6848                                         cfil_info = hash_entry->cfentry_cfil;
6849
6850                                         if ((sb->sb_flags & SB_RECV) == 0) {
6851                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6852                                                         return;
6853                                                 }
6854                                                 outgoing = 1;
6855                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6856                                         } else {
6857                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6858                                                         return;
6859                                                 }
6860                                                 outgoing = 0;
6861                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6862                                         }
6863
6864                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6865                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6866
6867                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6868                                         if (error == 0) {
6869                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6870                                         }
6871                                         cfil_release_sockbuf(so, outgoing);
6872                                 }
6873                         }
6874                 }
6875         }
6876 }
6877
6878 void
6879 cfil_filter_show(u_int32_t kcunit)
6880 {
6881         struct content_filter *cfc = NULL;
6882         struct cfil_entry *entry;
6883         int count = 0;
6884
6885         if (content_filters == NULL) {
6886                 return;
6887         }
6888         if (kcunit > MAX_CONTENT_FILTER) {
6889                 return;
6890         }
6891
6892         cfil_rw_lock_shared(&cfil_lck_rw);
6893
6894         if (content_filters[kcunit - 1] == NULL) {
6895                 cfil_rw_unlock_shared(&cfil_lck_rw);
6896                 return;
6897         }
6898         cfc = content_filters[kcunit - 1];
6899
6900         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6901             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6902         if (cfc->cf_flags & CFF_DETACHING) {
6903                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6904         }
6905         if (cfc->cf_flags & CFF_ACTIVE) {
6906                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6907         }
6908         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6909                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6910         }
6911
6912         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6913                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6914                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6915
6916                         count++;
6917
6918                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6919                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6920                         } else {
6921                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6922                         }
6923                 }
6924         }
6925
6926         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6927
6928         cfil_rw_unlock_shared(&cfil_lck_rw);
6929 }
6930
6931 void
6932 cfil_info_show(void)
6933 {
6934         struct cfil_info *cfil_info;
6935         int count = 0;
6936
6937         cfil_rw_lock_shared(&cfil_lck_rw);
6938
6939         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6940
6941         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6942                 count++;
6943
6944                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6945
6946                 if (cfil_info->cfi_flags & CFIF_DROP) {
6947                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6948                 }
6949                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6950                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6951                 }
6952                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6953                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6954                 }
6955                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6956                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6957                 }
6958                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6959                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6960                 }
6961                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6962                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6963                 }
6964                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6965                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6966                 }
6967         }
6968
6969         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6970
6971         cfil_rw_unlock_shared(&cfil_lck_rw);
6972 }
6973
6974 bool
6975 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int64_t current_time)
6976 {
6977         if (cfil_info && cfil_info->cfi_hash_entry &&
6978             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int64_t)timeout)) {
6979 #if GC_DEBUG
6980                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6981 #endif
6982                 return true;
6983         }
6984         return false;
6985 }
6986
6987 bool
6988 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6989 {
6990         struct cfil_entry *entry;
6991         struct timeval current_tv;
6992         struct timeval diff_time;
6993
6994         if (cfil_info == NULL) {
6995                 return false;
6996         }
6997
6998         /*
6999          * If we have queued up more data than passed offset and we haven't received
7000          * an action from user space for a while (the user space filter might have crashed),
7001          * return action timed out.
7002          */
7003         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
7004             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
7005                 microuptime(&current_tv);
7006
7007                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7008                         entry = &cfil_info->cfi_entries[kcunit - 1];
7009
7010                         if (entry->cfe_filter == NULL) {
7011                                 continue;
7012                         }
7013
7014                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
7015                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
7016                                 // haven't gotten an action from this filter, check timeout
7017                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
7018                                 if (diff_time.tv_sec >= timeout) {
7019 #if GC_DEBUG
7020                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
7021 #endif
7022                                         return true;
7023                                 }
7024                         }
7025                 }
7026         }
7027         return false;
7028 }
7029
7030 bool
7031 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
7032 {
7033         if (cfil_info == NULL) {
7034                 return false;
7035         }
7036
7037         /*
7038          * Clean up flow if it exceeded queue thresholds
7039          */
7040         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
7041             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
7042 #if GC_DEBUG
7043                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
7044                     cfil_udp_gc_mbuf_num_max,
7045                     cfil_udp_gc_mbuf_cnt_max,
7046                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
7047                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
7048                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
7049 #endif
7050                 return true;
7051         }
7052
7053         return false;
7054 }
7055
7056 static void
7057 cfil_udp_gc_thread_sleep(bool forever)
7058 {
7059         if (forever) {
7060                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
7061                     THREAD_INTERRUPTIBLE);
7062         } else {
7063                 uint64_t deadline = 0;
7064                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
7065                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7066
7067                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
7068                     THREAD_INTERRUPTIBLE, deadline);
7069         }
7070 }
7071
7072 static void
7073 cfil_udp_gc_thread_func(void *v, wait_result_t w)
7074 {
7075 #pragma unused(v, w)
7076
7077         ASSERT(cfil_udp_gc_thread == current_thread());
7078         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
7079
7080         // Kick off gc shortly
7081         cfil_udp_gc_thread_sleep(false);
7082         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
7083         /* NOTREACHED */
7084 }
7085
7086 static void
7087 cfil_info_udp_expire(void *v, wait_result_t w)
7088 {
7089 #pragma unused(v, w)
7090
7091         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
7092         static uint32_t expired_count = 0;
7093
7094         struct cfil_info *cfil_info;
7095         struct cfil_hash_entry *hash_entry;
7096         struct cfil_db *db;
7097         struct socket *so;
7098         u_int64_t current_time = 0;
7099
7100         current_time = net_uptime();
7101
7102         // Get all expired UDP flow ids
7103         cfil_rw_lock_shared(&cfil_lck_rw);
7104
7105         if (cfil_sock_udp_attached_count == 0) {
7106                 cfil_rw_unlock_shared(&cfil_lck_rw);
7107                 goto go_sleep;
7108         }
7109
7110         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
7111                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
7112                         break;
7113                 }
7114
7115                 if (IS_IP_DGRAM(cfil_info->cfi_so)) {
7116                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
7117                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
7118                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
7119                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
7120                                 expired_count++;
7121                         }
7122                 }
7123         }
7124         cfil_rw_unlock_shared(&cfil_lck_rw);
7125
7126         if (expired_count == 0) {
7127                 goto go_sleep;
7128         }
7129
7130         for (uint32_t i = 0; i < expired_count; i++) {
7131                 // Search for socket (UDP only and lock so)
7132                 so = cfil_socket_from_sock_id(expired_array[i], true);
7133                 if (so == NULL) {
7134                         continue;
7135                 }
7136
7137                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
7138                 if (cfil_info == NULL) {
7139                         goto unlock;
7140                 }
7141
7142                 db = so->so_cfil_db;
7143                 hash_entry = cfil_info->cfi_hash_entry;
7144
7145                 if (db == NULL || hash_entry == NULL) {
7146                         goto unlock;
7147                 }
7148
7149 #if GC_DEBUG || LIFECYCLE_DEBUG
7150                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
7151 #endif
7152
7153                 cfil_db_delete_entry(db, hash_entry);
7154                 CFIL_INFO_FREE(cfil_info);
7155                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
7156
7157                 if (so->so_flags & SOF_CONTENT_FILTER) {
7158                         if (db->cfdb_count == 0) {
7159                                 so->so_flags &= ~SOF_CONTENT_FILTER;
7160                         }
7161                         VERIFY(so->so_usecount > 0);
7162                         so->so_usecount--;
7163                 }
7164 unlock:
7165                 socket_unlock(so, 1);
7166         }
7167
7168 #if GC_DEBUG
7169         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
7170 #endif
7171         expired_count = 0;
7172
7173 go_sleep:
7174
7175         // Sleep forever (until waken up) if no more UDP flow to clean
7176         cfil_rw_lock_shared(&cfil_lck_rw);
7177         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
7178         cfil_rw_unlock_shared(&cfil_lck_rw);
7179         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
7180         /* NOTREACHED */
7181 }
7182
7183 struct m_tag *
7184 cfil_dgram_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
7185 {
7186         struct m_tag *tag = NULL;
7187         struct cfil_tag *ctag = NULL;
7188         struct cfil_hash_entry *hash_entry = NULL;
7189         struct inpcb *inp = NULL;
7190
7191         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
7192             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
7193                 return NULL;
7194         }
7195
7196         inp = sotoinpcb(cfil_info->cfi_so);
7197
7198         /* Allocate a tag */
7199         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
7200             sizeof(struct cfil_tag), M_DONTWAIT, m);
7201
7202         if (tag) {
7203                 ctag = (struct cfil_tag*)(tag + 1);
7204                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
7205                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
7206                 ctag->cfil_inp_flags = inp ? inp->inp_flags : 0;
7207
7208                 hash_entry = cfil_info->cfi_hash_entry;
7209                 if (hash_entry->cfentry_family == AF_INET6) {
7210                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
7211                             &hash_entry->cfentry_faddr.addr6,
7212                             hash_entry->cfentry_fport);
7213                 } else if (hash_entry->cfentry_family == AF_INET) {
7214                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
7215                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
7216                             hash_entry->cfentry_fport);
7217                 }
7218                 m_tag_prepend(m, tag);
7219                 return tag;
7220         }
7221         return NULL;
7222 }
7223
7224 struct m_tag *
7225 cfil_dgram_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, uint32_t *options,
7226     struct sockaddr **faddr, int *inp_flags)
7227 {
7228         struct m_tag *tag = NULL;
7229         struct cfil_tag *ctag = NULL;
7230
7231         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7232         if (tag) {
7233                 ctag = (struct cfil_tag *)(tag + 1);
7234                 if (state_change_cnt) {
7235                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
7236                 }
7237                 if (options) {
7238                         *options = ctag->cfil_so_options;
7239                 }
7240                 if (faddr) {
7241                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
7242                 }
7243                 if (inp_flags) {
7244                         *inp_flags = ctag->cfil_inp_flags;
7245                 }
7246
7247                 /*
7248                  * Unlink tag and hand it over to caller.
7249                  * Note that caller will be responsible to free it.
7250                  */
7251                 m_tag_unlink(m, tag);
7252                 return tag;
7253         }
7254         return NULL;
7255 }
7256
7257 boolean_t
7258 cfil_dgram_peek_socket_state(struct mbuf *m, int *inp_flags)
7259 {
7260         struct m_tag *tag = NULL;
7261         struct cfil_tag *ctag = NULL;
7262
7263         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
7264         if (tag) {
7265                 ctag = (struct cfil_tag *)(tag + 1);
7266                 if (inp_flags) {
7267                         *inp_flags = ctag->cfil_inp_flags;
7268                 }
7269                 return true;
7270         }
7271         return false;
7272 }
7273
7274 static int
7275 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
7276 {
7277         struct content_filter *cfc = NULL;
7278         errno_t error = 0;
7279         size_t msgsize = 0;
7280
7281         if (buffer == NULL || stats_count == 0) {
7282                 return error;
7283         }
7284
7285         if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
7286                 return error;
7287         }
7288
7289         cfc = content_filters[kcunit - 1];
7290         if (cfc == NULL) {
7291                 return error;
7292         }
7293
7294         /* Would be wasteful to try */
7295         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
7296                 error = ENOBUFS;
7297                 goto done;
7298         }
7299
7300         msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
7301         buffer->msghdr.cfm_len = (uint32_t)msgsize;
7302         buffer->msghdr.cfm_version = 1;
7303         buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
7304         buffer->msghdr.cfm_op = CFM_OP_STATS;
7305         buffer->msghdr.cfm_sock_id = 0;
7306         buffer->count = stats_count;
7307
7308 #if STATS_DEBUG
7309         CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
7310             kcunit,
7311             (unsigned long)msgsize,
7312             (unsigned long)sizeof(struct cfil_msg_stats_report),
7313             (unsigned long)sizeof(struct cfil_msg_sock_stats),
7314             (unsigned long)stats_count);
7315 #endif
7316
7317         error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
7318             buffer,
7319             msgsize,
7320             CTL_DATA_EOR);
7321         if (error != 0) {
7322                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
7323                 goto done;
7324         }
7325         OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
7326
7327 #if STATS_DEBUG
7328         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
7329 #endif
7330
7331 done:
7332
7333         if (error == ENOBUFS) {
7334                 OSIncrementAtomic(
7335                         &cfil_stats.cfs_stats_event_flow_control);
7336
7337                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
7338                         cfil_rw_lock_exclusive(&cfil_lck_rw);
7339                 }
7340
7341                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
7342
7343                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
7344         } else if (error != 0) {
7345                 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
7346         }
7347
7348         return error;
7349 }
7350
7351 static void
7352 cfil_stats_report_thread_sleep(bool forever)
7353 {
7354 #if STATS_DEBUG
7355         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
7356 #endif
7357
7358         if (forever) {
7359                 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
7360                     THREAD_INTERRUPTIBLE);
7361         } else {
7362                 uint64_t deadline = 0;
7363                 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
7364                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
7365
7366                 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
7367                     THREAD_INTERRUPTIBLE, deadline);
7368         }
7369 }
7370
7371 static void
7372 cfil_stats_report_thread_func(void *v, wait_result_t w)
7373 {
7374 #pragma unused(v, w)
7375
7376         ASSERT(cfil_stats_report_thread == current_thread());
7377         thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
7378
7379         // Kick off gc shortly
7380         cfil_stats_report_thread_sleep(false);
7381         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7382         /* NOTREACHED */
7383 }
7384
7385 static bool
7386 cfil_stats_collect_flow_stats_for_filter(int kcunit,
7387     struct cfil_info *cfil_info,
7388     struct cfil_entry *entry,
7389     struct timeval current_tv)
7390 {
7391         struct cfil_stats_report_buffer *buffer = NULL;
7392         struct cfil_msg_sock_stats *flow_array = NULL;
7393         struct cfil_msg_sock_stats *stats = NULL;
7394         struct inpcb *inp = NULL;
7395         struct timeval diff_time;
7396         uint64_t diff_time_usecs;
7397         int index = 0;
7398
7399         if (entry->cfe_stats_report_frequency == 0) {
7400                 return false;
7401         }
7402
7403         buffer = global_cfil_stats_report_buffers[kcunit - 1];
7404         if (buffer == NULL) {
7405 #if STATS_DEBUG
7406                 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7407 #endif
7408                 return false;
7409         }
7410
7411         timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7412         diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7413
7414 #if STATS_DEBUG
7415         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7416             (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7417             (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7418             (unsigned long long)current_tv.tv_sec,
7419             (unsigned long long)current_tv.tv_usec,
7420             (unsigned long long)diff_time.tv_sec,
7421             (unsigned long long)diff_time.tv_usec,
7422             (unsigned long long)diff_time_usecs,
7423             (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7424             cfil_info->cfi_sock_id);
7425 #endif
7426
7427         // Compare elapsed time in usecs
7428         if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7429 #if STATS_DEBUG
7430                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7431                     cfil_info->cfi_byte_inbound_count,
7432                     entry->cfe_byte_inbound_count_reported);
7433                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7434                     cfil_info->cfi_byte_outbound_count,
7435                     entry->cfe_byte_outbound_count_reported);
7436 #endif
7437                 // Check if flow has new bytes that have not been reported
7438                 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7439                     entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7440                         flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7441                         index = global_cfil_stats_counts[kcunit - 1];
7442
7443                         stats = &flow_array[index];
7444                         stats->cfs_sock_id = cfil_info->cfi_sock_id;
7445                         stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7446                         stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7447
7448                         if (entry->cfe_laddr_sent == false) {
7449                                 /* cache it if necessary */
7450                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7451                                         inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7452                                         if (inp != NULL) {
7453                                                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7454                                                 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7455                                                 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7456                                                 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7457                                                     src, dst, !IS_INP_V6(inp), outgoing);
7458                                         }
7459                                 }
7460
7461                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7462                                         stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7463                                         entry->cfe_laddr_sent = true;
7464                                 }
7465                         }
7466
7467                         global_cfil_stats_counts[kcunit - 1]++;
7468
7469                         entry->cfe_stats_report_ts = current_tv;
7470                         entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7471                         entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7472 #if STATS_DEBUG
7473                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7474 #endif
7475                         CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7476                         return true;
7477                 }
7478         }
7479         return false;
7480 }
7481
7482 static void
7483 cfil_stats_report(void *v, wait_result_t w)
7484 {
7485 #pragma unused(v, w)
7486
7487         struct cfil_info *cfil_info = NULL;
7488         struct cfil_entry *entry = NULL;
7489         struct timeval current_tv;
7490         uint32_t flow_count = 0;
7491         uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7492         bool flow_reported = false;
7493
7494 #if STATS_DEBUG
7495         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7496 #endif
7497
7498         do {
7499                 // Collect all sock ids of flows that has new stats
7500                 cfil_rw_lock_shared(&cfil_lck_rw);
7501
7502                 if (cfil_sock_attached_stats_count == 0) {
7503 #if STATS_DEBUG
7504                         CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7505 #endif
7506                         cfil_rw_unlock_shared(&cfil_lck_rw);
7507                         goto go_sleep;
7508                 }
7509
7510                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7511                         if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7512                                 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7513                         }
7514                         global_cfil_stats_counts[kcunit - 1] = 0;
7515                 }
7516
7517                 microuptime(&current_tv);
7518                 flow_count = 0;
7519
7520                 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7521                         if (saved_next_sock_id != 0 &&
7522                             saved_next_sock_id == cfil_info->cfi_sock_id) {
7523                                 // Here is where we left off previously, start accumulating
7524                                 saved_next_sock_id = 0;
7525                         }
7526
7527                         if (saved_next_sock_id == 0) {
7528                                 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7529                                         // Examine a fixed number of flows each round.  Remember the current flow
7530                                         // so we can start from here for next loop
7531                                         saved_next_sock_id = cfil_info->cfi_sock_id;
7532                                         break;
7533                                 }
7534
7535                                 flow_reported = false;
7536                                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7537                                         entry = &cfil_info->cfi_entries[kcunit - 1];
7538                                         if (entry->cfe_filter == NULL) {
7539 #if STATS_DEBUG
7540                                                 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7541                                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7542 #endif
7543                                                 continue;
7544                                         }
7545
7546                                         if ((entry->cfe_stats_report_frequency > 0) &&
7547                                             cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7548                                                 flow_reported = true;
7549                                         }
7550                                 }
7551                                 if (flow_reported == true) {
7552                                         flow_count++;
7553                                 }
7554                         }
7555                 }
7556
7557                 if (flow_count > 0) {
7558 #if STATS_DEBUG
7559                         CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7560 #endif
7561                         for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7562                                 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7563                                     global_cfil_stats_counts[kcunit - 1] > 0) {
7564                                         cfil_dispatch_stats_event_locked(kcunit,
7565                                             global_cfil_stats_report_buffers[kcunit - 1],
7566                                             global_cfil_stats_counts[kcunit - 1]);
7567                                 }
7568                         }
7569                 } else {
7570                         cfil_rw_unlock_shared(&cfil_lck_rw);
7571                         goto go_sleep;
7572                 }
7573
7574                 cfil_rw_unlock_shared(&cfil_lck_rw);
7575
7576                 // Loop again if we haven't finished the whole cfil_info list
7577         } while (saved_next_sock_id != 0);
7578
7579 go_sleep:
7580
7581         // Sleep forever (until waken up) if no more flow to report
7582         cfil_rw_lock_shared(&cfil_lck_rw);
7583         cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7584         cfil_rw_unlock_shared(&cfil_lck_rw);
7585         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7586         /* NOTREACHED */
7587 }