bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by TCP/IP sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many TCP/IP sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation is limited to TCP sockets.
  50  * - The current implementation supports up to two simultaneous content filters
  51  *   for the sake of simplicity of the implementation.
  52  *
  53  *
  54  * NECP FILTER CONTROL UNIT
  55  *
  56  * A user space filter agent uses the Network Extension Control Policy (NECP)
  57  * database to specify which TCP/IP sockets need to be filtered. The NECP
  58  * criteria may be based on a variety of properties like user ID or proc UUID.
  59  *
  60  * The NECP "filter control unit" is used by the socket content filter subsystem
  61  * to deliver the relevant TCP/IP content information to the appropriate
  62  * user space filter agent via its kernel control socket instance.
  63  * This works as follows:
  64  *
  65  * 1) The user space filter agent specifies an NECP filter control unit when
  66  *    in adds its filtering rules to the NECP database.
  67  *
  68  * 2) The user space filter agent also sets its NECP filter control unit on the
  69  *    content filter kernel control socket via the socket option
  70  *    CFIL_OPT_NECP_CONTROL_UNIT.
  71  *
  72  * 3) The NECP database is consulted to find out if a given TCP/IP socket
  73  *    needs to be subjected to content filtering and returns the corresponding
  74  *    NECP filter control unit  -- the NECP filter control unit is actually
  75  *    stored in the TCP/IP socket structure so the NECP lookup is really simple.
  76  *
  77  * 4) The NECP filter control unit is then used to find the corresponding
  78  *    kernel control socket instance.
  79  *
  80  * Note: NECP currently supports a single filter control unit per TCP/IP socket
  81  *       but this restriction may be soon lifted.
  82  *
  83  *
  84  * THE MESSAGING PROTOCOL
  85  *
  86  * The socket content filter subsystem and a user space filter agent
  87  * communicate over the kernel control socket via an asynchronous
  88  * messaging protocol (this is not a request-response protocol).
  89  * The socket content filter subsystem sends event messages to the user
  90  * space filter agent about the TCP/IP sockets it is interested to filter.
  91  * The user space filter agent sends action messages to either allow
  92  * data to pass or to disallow the data flow (and drop the connection).
  93  *
  94  * All messages over a content filter kernel control socket share the same
  95  * common header of type "struct cfil_msg_hdr". The message type tells if
  96  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  97  * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
  98  * Note the message header length field may be padded for alignment and can
  99  * be larger than the actual content of the message.
 100  * The field "cfm_op" describe the kind of event or action.
 101  *
 102  * Here are the kinds of content filter events:
 103  * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
 104  * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
 105  * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
 106  * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
 107  *
 108  *
 109  * EVENT MESSAGES
 110  *
 111  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 112  * data that is being sent or received. The position of this span of data
 113  * in the data flow is described by a set of start and end offsets. These
 114  * are absolute 64 bits offsets. The first byte sent (or received) starts
 115  * at offset 0 and ends at offset 1. The length of the content data
 116  * is given by the difference between the end offset and the start offset.
 117  *
 118  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 119  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 120  * action message is sent by the user space filter agent.
 121  *
 122  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 123  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 124  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 125  *
 126  * They are two kinds of primary content filter actions:
 127  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 128  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 129  *
 130  * There is also an action to mark a given client flow as already filtered
 131  * at a higher level, CFM_OP_BLESS_CLIENT.
 132  *
 133  *
 134  * ACTION MESSAGES
 135  *
 136  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 137  * agent allow data to flow up to the specified pass offset -- there
 138  * is a pass offset for outgoing data and  a pass offset for incoming data.
 139  * When a new TCP/IP socket is attached to the content filter, each pass offset
 140  * is initially set to 0 so not data is allowed to pass by default.
 141  * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 142  * then the data flow becomes unrestricted.
 143  *
 144  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 145  * with a pass offset smaller than the pass offset of a previous
 146  * CFM_OP_DATA_UPDATE message is silently ignored.
 147  *
 148  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 149  * to tell the kernel how much data it wants to see by using the peek offsets.
 150  * Just like pass offsets, there is a peek offset for each direction.
 151  * When a new TCP/IP socket is attached to the content filter, each peek offset
 152  * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
 153  * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
 154  * with a greater than 0 peek offset is sent by the user space filter agent.
 155  * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 156  * then the flow of update data events becomes unrestricted.
 157  *
 158  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 159  * Also a peek offsets cannot be smaller than the corresponding end offset
 160  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 161  * to set a too small peek value is silently ignored.
 162  *
 163  *
 164  * PER SOCKET "struct cfil_info"
 165  *
 166  * As soon as a TCP/IP socket gets attached to a content filter, a
 167  * "struct cfil_info" is created to hold the content filtering state for this
 168  * socket.
 169  *
 170  * The content filtering state is made of the following information
 171  * for each direction:
 172  * - The current pass offset;
 173  * - The first and last offsets of the data pending, waiting for a filtering
 174  *   decision;
 175  * - The inject queue for data that passed the filters and that needs
 176  *   to be re-injected;
 177  * - A content filter specific state in a set of  "struct cfil_entry"
 178  *
 179  *
 180  * CONTENT FILTER STATE "struct cfil_entry"
 181  *
 182  * The "struct cfil_entry" maintains the information most relevant to the
 183  * message handling over a kernel control socket with a user space filter agent.
 184  *
 185  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 186  * to the kernel control socket unit it corresponds to and also has a pointer
 187  * to the corresponding "struct content_filter".
 188  *
 189  * For each direction, "struct cfil_entry" maintains the following information:
 190  * - The pass offset
 191  * - The peek offset
 192  * - The offset of the last data peeked at by the filter
 193  * - A queue of data that's waiting to be delivered to the  user space filter
 194  *   agent on the kernel control socket
 195  * - A queue of data for which event messages have been sent on the kernel
 196  *   control socket and are pending for a filtering decision.
 197  *
 198  *
 199  * CONTENT FILTER QUEUES
 200  *
 201  * Data that is being filtered is steered away from the TCP/IP socket buffer
 202  * and instead will sit in one of three content filter queues until the data
 203  * can be re-injected into the TCP/IP socket buffer.
 204  *
 205  * A content filter queue is represented by "struct cfil_queue" that contains
 206  * a list of mbufs and the start and end offset of the data span of
 207  * the list of mbufs.
 208  *
 209  * The data moves into the three content filter queues according to this
 210  * sequence:
 211  * a) The "cfe_ctl_q" of "struct cfil_entry"
 212  * b) The "cfe_pending_q" of "struct cfil_entry"
 213  * c) The "cfi_inject_q" of "struct cfil_info"
 214  *
 215  * Note: The sequence (a),(b) may be repeated several times if there is more
 216  * than one content filter attached to the TCP/IP socket.
 217  *
 218  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 219  * kernel conntrol socket for two reasons:
 220  * - The peek offset is less that the end offset of the mbuf data
 221  * - The kernel control socket is flow controlled
 222  *
 223  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 224  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 225  * socket and are waiting for a pass action message fromn the user space
 226  * filter agent. An mbuf length must be fully allowed to pass to be removed
 227  * from the cfe_pending_q.
 228  *
 229  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 230  * by the user space filter agent and that needs to be re-injected into the
 231  * TCP/IP socket.
 232  *
 233  *
 234  * IMPACT ON FLOW CONTROL
 235  *
 236  * An essential aspect of the content filer subsystem is to minimize the
 237  * impact on flow control of the TCP/IP sockets being filtered.
 238  *
 239  * The processing overhead of the content filtering may have an effect on
 240  * flow control by adding noticeable delays and cannot be eliminated --
 241  * care must be taken by the user space filter agent to minimize the
 242  * processing delays.
 243  *
 244  * The amount of data being filtered is kept in buffers while waiting for
 245  * a decision by the user space filter agent. This amount of data pending
 246  * needs to be subtracted from the amount of data available in the
 247  * corresponding TCP/IP socket buffer. This is done by modifying
 248  * sbspace() and tcp_sbspace() to account for amount of data pending
 249  * in the content filter.
 250  *
 251  *
 252  * LOCKING STRATEGY
 253  *
 254  * The global state of content filter subsystem is protected by a single
 255  * read-write lock "cfil_lck_rw". The data flow can be done with the
 256  * cfil read-write lock held as shared so it can be re-entered from multiple
 257  * threads.
 258  *
 259  * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
 260  * protected by the socket lock.
 261  *
 262  * A TCP/IP socket lock cannot be taken while the cfil read-write lock
 263  * is held. That's why we have some sequences where we drop the cfil read-write
 264  * lock before taking the TCP/IP lock.
 265  *
 266  * It is also important to lock the TCP/IP socket buffer while the content
 267  * filter is modifying the amount of pending data. Otherwise the calculations
 268  * in sbspace() and tcp_sbspace()  could be wrong.
 269  *
 270  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 271  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 272  *
 273  * Actually "cfe_link" and "cfe_filter" are protected by both by
 274  * "cfil_lck_rw" and the socket lock: they may be modified only when
 275  * "cfil_lck_rw" is exclusive and the socket is locked.
 276  *
 277  * To read the other fields of "struct content_filter" we have to take
 278  * "cfil_lck_rw" in shared mode.
 279  *
 280  *
 281  * LIMITATIONS
 282  *
 283  * - For TCP sockets only
 284  *
 285  * - Does not support TCP unordered messages
 286  */
 287
 288 /*
 289  *      TO DO LIST
 290  *
 291  *      SOONER:
 292  *
 293  *      Deal with OOB
 294  *
 295  *      LATER:
 296  *
 297  *      If support datagram, enqueue control and address mbufs as well
 298  */
 299
 300 #include <sys/types.h>
 301 #include <sys/kern_control.h>
 302 #include <sys/queue.h>
 303 #include <sys/domain.h>
 304 #include <sys/protosw.h>
 305 #include <sys/syslog.h>
 306 #include <sys/systm.h>
 307 #include <sys/param.h>
 308 #include <sys/mbuf.h>
 309
 310 #include <kern/locks.h>
 311 #include <kern/zalloc.h>
 312 #include <kern/debug.h>
 313
 314 #include <net/content_filter.h>
 315 #include <net/content_filter_crypto.h>
 316
 317 #include <netinet/in_pcb.h>
 318 #include <netinet/tcp.h>
 319 #include <netinet/tcp_var.h>
 320 #include <netinet/udp.h>
 321 #include <netinet/udp_var.h>
 322
 323 #include <string.h>
 324 #include <libkern/libkern.h>
 325 #include <kern/sched_prim.h>
 326 #include <kern/task.h>
 327 #include <mach/task_info.h>
 328
 329 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 330 #define MAX_CONTENT_FILTER 2
 331 #else
 332 #define MAX_CONTENT_FILTER 8
 333 #endif
 334
 335 struct cfil_entry;
 336
 337 /*
 338  * The structure content_filter represents a user space content filter
 339  * It's created and associated with a kernel control socket instance
 340  */
 341 struct content_filter {
 342         kern_ctl_ref            cf_kcref;
 343         u_int32_t               cf_kcunit;
 344         u_int32_t               cf_flags;
 345
 346         uint32_t                cf_necp_control_unit;
 347
 348         uint32_t                cf_sock_count;
 349         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 350
 351         cfil_crypto_state_t cf_crypto_state;
 352 };
 353
 354 #define CFF_ACTIVE              0x01
 355 #define CFF_DETACHING           0x02
 356 #define CFF_FLOW_CONTROLLED     0x04
 357
 358 struct content_filter **content_filters = NULL;
 359 uint32_t cfil_active_count = 0; /* Number of active content filters */
 360 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 361 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 362 uint32_t cfil_sock_attached_stats_count = 0;    /* Number of sockets requested periodic stats report */
 363 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 364
 365 static kern_ctl_ref cfil_kctlref = NULL;
 366
 367 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 368 static lck_attr_t *cfil_lck_attr = NULL;
 369 static lck_grp_t *cfil_lck_grp = NULL;
 370 decl_lck_rw_data(static, cfil_lck_rw);
 371
 372 #define CFIL_RW_LCK_MAX 8
 373
 374 int cfil_rw_nxt_lck = 0;
 375 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 376
 377 int cfil_rw_nxt_unlck = 0;
 378 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 379
 380 #define CONTENT_FILTER_ZONE_NAME        "content_filter"
 381 #define CONTENT_FILTER_ZONE_MAX         10
 382 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
 383
 384
 385 #define CFIL_INFO_ZONE_NAME     "cfil_info"
 386 #define CFIL_INFO_ZONE_MAX      1024
 387 static struct zone *cfil_info_zone = NULL;      /* zone for cfil_info */
 388
 389 MBUFQ_HEAD(cfil_mqhead);
 390
 391 struct cfil_queue {
 392         uint64_t                q_start; /* offset of first byte in queue */
 393         uint64_t                q_end; /* offset of last byte in queue */
 394         struct cfil_mqhead      q_mq;
 395 };
 396
 397 /*
 398  * struct cfil_entry
 399  *
 400  * The is one entry per content filter
 401  */
 402 struct cfil_entry {
 403         TAILQ_ENTRY(cfil_entry) cfe_link;
 404         SLIST_ENTRY(cfil_entry) cfe_order_link;
 405         struct content_filter   *cfe_filter;
 406
 407         struct cfil_info        *cfe_cfil_info;
 408         uint32_t                cfe_flags;
 409         uint32_t                cfe_necp_control_unit;
 410         struct timeval          cfe_last_event; /* To user space */
 411         struct timeval          cfe_last_action; /* From user space */
 412         uint64_t                cfe_byte_inbound_count_reported; /* stats already been reported */
 413         uint64_t                cfe_byte_outbound_count_reported; /* stats already been reported */
 414         struct timeval          cfe_stats_report_ts; /* Timestamp for last stats report */
 415         uint32_t                cfe_stats_report_frequency; /* Interval for stats report in msecs */
 416         boolean_t               cfe_laddr_sent;
 417
 418         struct cfe_buf {
 419                 /*
 420                  * cfe_pending_q holds data that has been delivered to
 421                  * the filter and for which we are waiting for an action
 422                  */
 423                 struct cfil_queue       cfe_pending_q;
 424                 /*
 425                  * This queue is for data that has not be delivered to
 426                  * the content filter (new data, pass peek or flow control)
 427                  */
 428                 struct cfil_queue       cfe_ctl_q;
 429
 430                 uint64_t                cfe_pass_offset;
 431                 uint64_t                cfe_peek_offset;
 432                 uint64_t                cfe_peeked;
 433         } cfe_snd, cfe_rcv;
 434 };
 435
 436 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 437 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 438 #define CFEF_DATA_START                 0x0004  /* can send data event */
 439 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 440 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 441 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 442 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 443 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 444
 445
 446 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 447                 struct timeval _tdiff;                                                                                          \
 448                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 449                         timersub(t1, t0, &_tdiff);                                                                              \
 450                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 451                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 452                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 453                 }
 454
 455 struct cfil_hash_entry;
 456
 457 /*
 458  * struct cfil_info
 459  *
 460  * There is a struct cfil_info per socket
 461  */
 462 struct cfil_info {
 463         TAILQ_ENTRY(cfil_info)  cfi_link;
 464         TAILQ_ENTRY(cfil_info)  cfi_link_stats;
 465         struct socket           *cfi_so;
 466         uint64_t                cfi_flags;
 467         uint64_t                cfi_sock_id;
 468         struct timeval64        cfi_first_event;
 469         uint32_t                cfi_op_list_ctr;
 470         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 471         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 472         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 473         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 474
 475         int                     cfi_dir;
 476         uint64_t                cfi_byte_inbound_count;
 477         uint64_t                cfi_byte_outbound_count;
 478
 479         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 480         struct cfi_buf {
 481                 /*
 482                  * cfi_pending_first and cfi_pending_last describe the total
 483                  * amount of data outstanding for all the filters on
 484                  * this socket and data in the flow queue
 485                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 486                  */
 487                 uint64_t                cfi_pending_first;
 488                 uint64_t                cfi_pending_last;
 489                 uint32_t                cfi_pending_mbcnt;
 490                 uint32_t                cfi_pending_mbnum;
 491                 uint32_t                cfi_tail_drop_cnt;
 492                 /*
 493                  * cfi_pass_offset is the minimum of all the filters
 494                  */
 495                 uint64_t                cfi_pass_offset;
 496                 /*
 497                  * cfi_inject_q holds data that needs to be re-injected
 498                  * into the socket after filtering and that can
 499                  * be queued because of flow control
 500                  */
 501                 struct cfil_queue       cfi_inject_q;
 502         } cfi_snd, cfi_rcv;
 503
 504         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 505         struct cfil_hash_entry *cfi_hash_entry;
 506         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 507 } __attribute__((aligned(8)));
 508
 509 #define CFIF_DROP               0x0001  /* drop action applied */
 510 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 511 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 512 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 513 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 514 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 515 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 516 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 517 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 518
 519 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 520 #define CFI_SHIFT_GENCNT        32
 521 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 522 #define CFI_SHIFT_FLOWHASH      0
 523
 524 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
 525
 526 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 527 TAILQ_HEAD(cfil_sock_head_stats, cfil_info) cfil_sock_head_stats;
 528
 529 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 530 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 531
 532 /*
 533  * UDP Socket Support
 534  */
 535 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 536 #define CFILHASHSIZE 16
 537 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 538 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 539 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 540                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 541 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 542                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 543 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 544 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 545 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 546                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 547                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 548
 549 /*
 550  * Periodic Statistics Report:
 551  */
 552 static struct thread *cfil_stats_report_thread;
 553 #define CFIL_STATS_REPORT_INTERVAL_MIN_MSEC  500   // Highest report frequency
 554 #define CFIL_STATS_REPORT_RUN_INTERVAL_NSEC  (CFIL_STATS_REPORT_INTERVAL_MIN_MSEC * NSEC_PER_MSEC)
 555 #define CFIL_STATS_REPORT_MAX_COUNT          50    // Max stats to be reported per run
 556
 557 /* This buffer must have same layout as struct cfil_msg_stats_report */
 558 struct cfil_stats_report_buffer {
 559         struct cfil_msg_hdr        msghdr;
 560         uint32_t                   count;
 561         struct cfil_msg_sock_stats stats[CFIL_STATS_REPORT_MAX_COUNT];
 562 };
 563 static struct cfil_stats_report_buffer *global_cfil_stats_report_buffers[MAX_CONTENT_FILTER];
 564 static uint32_t global_cfil_stats_counts[MAX_CONTENT_FILTER];
 565
 566 /*
 567  * UDP Garbage Collection:
 568  */
 569 static struct thread *cfil_udp_gc_thread;
 570 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 571 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 572 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 573 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 574
 575 /*
 576  * UDP flow queue thresholds
 577  */
 578 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 579 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 580 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 581 /*
 582  * UDP flow queue threshold globals:
 583  */
 584 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 585 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 586
 587 /*
 588  * struct cfil_hash_entry
 589  *
 590  * Hash entry for cfil_info
 591  */
 592 struct cfil_hash_entry {
 593         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 594         struct cfil_info               *cfentry_cfil;
 595         u_short cfentry_fport;
 596         u_short cfentry_lport;
 597         sa_family_t                    cfentry_family;
 598         u_int32_t                      cfentry_flowhash;
 599         u_int64_t                      cfentry_lastused;
 600         union {
 601                 /* foreign host table entry */
 602                 struct in_addr_4in6 addr46;
 603                 struct in6_addr addr6;
 604         } cfentry_faddr;
 605         union {
 606                 /* local host table entry */
 607                 struct in_addr_4in6 addr46;
 608                 struct in6_addr addr6;
 609         } cfentry_laddr;
 610 };
 611
 612 /*
 613  * struct cfil_db
 614  *
 615  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 616  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 617  */
 618 struct cfil_db {
 619         struct socket       *cfdb_so;
 620         uint32_t            cfdb_count;       /* Number of total content filters */
 621         struct cfilhashhead *cfdb_hashbase;
 622         u_long              cfdb_hashmask;
 623         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 624 };
 625
 626 /*
 627  * CFIL specific mbuf tag:
 628  * Save state of socket at the point of data entry into cfil.
 629  * Use saved state for reinjection at protocol layer.
 630  */
 631 struct cfil_tag {
 632         union sockaddr_in_4_6 cfil_faddr;
 633         uint32_t cfil_so_state_change_cnt;
 634         short cfil_so_options;
 635 };
 636
 637 #define    CFIL_HASH_ENTRY_ZONE_NAME    "cfil_entry_hash"
 638 #define    CFIL_HASH_ENTRY_ZONE_MAX     1024
 639 static struct zone *cfil_hash_entry_zone = NULL;
 640
 641 #define    CFIL_DB_ZONE_NAME       "cfil_db"
 642 #define    CFIL_DB_ZONE_MAX        1024
 643 static struct zone *cfil_db_zone = NULL;
 644
 645 /*
 646  * Statistics
 647  */
 648
 649 struct cfil_stats cfil_stats;
 650
 651 /*
 652  * For troubleshooting
 653  */
 654 int cfil_log_level = LOG_ERR;
 655 int cfil_debug = 1;
 656
 657 // Debug controls added for selective debugging.
 658 // Disabled for production.  If enabled,
 659 // these will have performance impact
 660 #define LIFECYCLE_DEBUG 0
 661 #define VERDICT_DEBUG 0
 662 #define DATA_DEBUG 0
 663 #define SHOW_DEBUG 0
 664 #define GC_DEBUG 0
 665 #define STATS_DEBUG 0
 666
 667 /*
 668  * Sysctls for logs and statistics
 669  */
 670 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 671     struct sysctl_req *);
 672 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 673     struct sysctl_req *);
 674
 675 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 676
 677 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 678     &cfil_log_level, 0, "");
 679
 680 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 681     &cfil_debug, 0, "");
 682
 683 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 684     &cfil_sock_attached_count, 0, "");
 685
 686 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 687     &cfil_active_count, 0, "");
 688
 689 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 690     &cfil_close_wait_timeout, 0, "");
 691
 692 static int cfil_sbtrim = 1;
 693 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 694     &cfil_sbtrim, 0, "");
 695
 696 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 697     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 698
 699 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 700     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 701
 702 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 703     &cfil_stats, cfil_stats, "");
 704
 705 /*
 706  * Forward declaration to appease the compiler
 707  */
 708 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 709     uint64_t, uint64_t);
 710 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 711 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 712 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 713 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 714 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 715     struct mbuf *, struct mbuf *, uint32_t);
 716 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 717     struct mbuf *, uint64_t);
 718 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 719     struct in_addr, u_int16_t);
 720 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 721     struct in6_addr *, u_int16_t);
 722
 723 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 724 static void cfil_info_free(struct cfil_info *);
 725 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 726 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 727 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 728 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 729 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 730 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 731 static void cfil_info_verify(struct cfil_info *);
 732 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 733     uint64_t, uint64_t);
 734 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 735 static void cfil_release_sockbuf(struct socket *, int);
 736 static int cfil_filters_attached(struct socket *);
 737
 738 static void cfil_rw_lock_exclusive(lck_rw_t *);
 739 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 740 static void cfil_rw_lock_shared(lck_rw_t *);
 741 static void cfil_rw_unlock_shared(lck_rw_t *);
 742 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 743 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 744
 745 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 746 static errno_t cfil_db_init(struct socket *);
 747 static void cfil_db_free(struct socket *so);
 748 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 749 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 750 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 751 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 752 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
 753 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 754 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 755     struct mbuf *, struct mbuf *, uint32_t);
 756 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 757 static void cfil_sock_udp_is_closed(struct socket *);
 758 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 759 static int cfil_sock_udp_shutdown(struct socket *, int *);
 760 static void cfil_sock_udp_close_wait(struct socket *);
 761 static void cfil_sock_udp_buf_update(struct sockbuf *);
 762 static int cfil_filters_udp_attached(struct socket *, bool);
 763 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 764     struct in6_addr **, struct in6_addr **,
 765     u_int16_t *, u_int16_t *);
 766 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 767     struct in_addr *, struct in_addr *,
 768     u_int16_t *, u_int16_t *);
 769 static void cfil_info_log(int, struct cfil_info *, const char *);
 770 void cfil_filter_show(u_int32_t);
 771 void cfil_info_show(void);
 772 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
 773 bool cfil_info_action_timed_out(struct cfil_info *, int);
 774 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 775 struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
 776 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 777 static void cfil_info_udp_expire(void *, wait_result_t);
 778 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
 779 static void cfil_sock_received_verdict(struct socket *so);
 780 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 781     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 782     boolean_t, boolean_t);
 783 static void cfil_stats_report_thread_func(void *, wait_result_t);
 784 static void cfil_stats_report(void *v, wait_result_t w);
 785
 786 bool check_port(struct sockaddr *, u_short);
 787
 788 /*
 789  * Content filter global read write lock
 790  */
 791
 792 static void
 793 cfil_rw_lock_exclusive(lck_rw_t *lck)
 794 {
 795         void *lr_saved;
 796
 797         lr_saved = __builtin_return_address(0);
 798
 799         lck_rw_lock_exclusive(lck);
 800
 801         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 802         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 803 }
 804
 805 static void
 806 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 807 {
 808         void *lr_saved;
 809
 810         lr_saved = __builtin_return_address(0);
 811
 812         lck_rw_unlock_exclusive(lck);
 813
 814         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 815         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 816 }
 817
 818 static void
 819 cfil_rw_lock_shared(lck_rw_t *lck)
 820 {
 821         void *lr_saved;
 822
 823         lr_saved = __builtin_return_address(0);
 824
 825         lck_rw_lock_shared(lck);
 826
 827         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 828         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 829 }
 830
 831 static void
 832 cfil_rw_unlock_shared(lck_rw_t *lck)
 833 {
 834         void *lr_saved;
 835
 836         lr_saved = __builtin_return_address(0);
 837
 838         lck_rw_unlock_shared(lck);
 839
 840         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 841         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 842 }
 843
 844 static boolean_t
 845 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 846 {
 847         void *lr_saved;
 848         boolean_t upgraded;
 849
 850         lr_saved = __builtin_return_address(0);
 851
 852         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 853         if (upgraded) {
 854                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 855                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 856         }
 857         return upgraded;
 858 }
 859
 860 static void
 861 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 862 {
 863         void *lr_saved;
 864
 865         lr_saved = __builtin_return_address(0);
 866
 867         lck_rw_lock_exclusive_to_shared(lck);
 868
 869         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 870         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 871 }
 872
 873 static void
 874 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 875 {
 876 #if !MACH_ASSERT
 877 #pragma unused(lck, exclusive)
 878 #endif
 879         LCK_RW_ASSERT(lck,
 880             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 881 }
 882
 883 /*
 884  * Return the number of bytes in the mbuf chain using the same
 885  * method as m_length() or sballoc()
 886  *
 887  * Returns data len - starting from PKT start
 888  * - retmbcnt - optional param to get total mbuf bytes in chain
 889  * - retmbnum - optional param to get number of mbufs in chain
 890  */
 891 static unsigned int
 892 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 893 {
 894         struct mbuf *m0;
 895         unsigned int pktlen = 0;
 896         int mbcnt;
 897         int mbnum;
 898
 899         // Locate the start of data
 900         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 901                 if (m0->m_flags & M_PKTHDR) {
 902                         break;
 903                 }
 904         }
 905         if (m0 == NULL) {
 906                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 907                 return 0;
 908         }
 909         m = m0;
 910
 911         if (retmbcnt == NULL && retmbnum == NULL) {
 912                 return m_length(m);
 913         }
 914
 915         pktlen = 0;
 916         mbcnt = 0;
 917         mbnum = 0;
 918         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 919                 pktlen += m0->m_len;
 920                 mbnum++;
 921                 mbcnt += MSIZE;
 922                 if (m0->m_flags & M_EXT) {
 923                         mbcnt += m0->m_ext.ext_size;
 924                 }
 925         }
 926         if (retmbcnt) {
 927                 *retmbcnt = mbcnt;
 928         }
 929         if (retmbnum) {
 930                 *retmbnum = mbnum;
 931         }
 932         return pktlen;
 933 }
 934
 935 static struct mbuf *
 936 cfil_data_start(struct mbuf *m)
 937 {
 938         struct mbuf *m0;
 939
 940         // Locate the start of data
 941         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 942                 if (m0->m_flags & M_PKTHDR) {
 943                         break;
 944                 }
 945         }
 946         return m0;
 947 }
 948
 949 /*
 950  * Common mbuf queue utilities
 951  */
 952
 953 static inline void
 954 cfil_queue_init(struct cfil_queue *cfq)
 955 {
 956         cfq->q_start = 0;
 957         cfq->q_end = 0;
 958         MBUFQ_INIT(&cfq->q_mq);
 959 }
 960
 961 static inline uint64_t
 962 cfil_queue_drain(struct cfil_queue *cfq)
 963 {
 964         uint64_t drained = cfq->q_start - cfq->q_end;
 965         cfq->q_start = 0;
 966         cfq->q_end = 0;
 967         MBUFQ_DRAIN(&cfq->q_mq);
 968
 969         return drained;
 970 }
 971
 972 /* Return 1 when empty, 0 otherwise */
 973 static inline int
 974 cfil_queue_empty(struct cfil_queue *cfq)
 975 {
 976         return MBUFQ_EMPTY(&cfq->q_mq);
 977 }
 978
 979 static inline uint64_t
 980 cfil_queue_offset_first(struct cfil_queue *cfq)
 981 {
 982         return cfq->q_start;
 983 }
 984
 985 static inline uint64_t
 986 cfil_queue_offset_last(struct cfil_queue *cfq)
 987 {
 988         return cfq->q_end;
 989 }
 990
 991 static inline uint64_t
 992 cfil_queue_len(struct cfil_queue *cfq)
 993 {
 994         return cfq->q_end - cfq->q_start;
 995 }
 996
 997 /*
 998  * Routines to verify some fundamental assumptions
 999  */
1000
1001 static void
1002 cfil_queue_verify(struct cfil_queue *cfq)
1003 {
1004         mbuf_t chain;
1005         mbuf_t m;
1006         mbuf_t n;
1007         uint64_t queuesize = 0;
1008
1009         /* Verify offset are ordered */
1010         VERIFY(cfq->q_start <= cfq->q_end);
1011
1012         /*
1013          * When queue is empty, the offsets are equal otherwise the offsets
1014          * are different
1015          */
1016         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
1017             (!MBUFQ_EMPTY(&cfq->q_mq) &&
1018             cfq->q_start != cfq->q_end));
1019
1020         MBUFQ_FOREACH(chain, &cfq->q_mq) {
1021                 size_t chainsize = 0;
1022                 m = chain;
1023                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
1024                 // skip the addr and control stuff if present
1025                 m = cfil_data_start(m);
1026
1027                 if (m == NULL ||
1028                     m == (void *)M_TAG_FREE_PATTERN ||
1029                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1030                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1031                         panic("%s - mq %p is free at %p", __func__,
1032                             &cfq->q_mq, m);
1033                 }
1034                 for (n = m; n != NULL; n = n->m_next) {
1035                         if (n->m_type != MT_DATA &&
1036                             n->m_type != MT_HEADER &&
1037                             n->m_type != MT_OOBDATA) {
1038                                 panic("%s - %p unsupported type %u", __func__,
1039                                     n, n->m_type);
1040                         }
1041                         chainsize += n->m_len;
1042                 }
1043                 if (mlen != chainsize) {
1044                         panic("%s - %p m_length() %u != chainsize %lu",
1045                             __func__, m, mlen, chainsize);
1046                 }
1047                 queuesize += chainsize;
1048         }
1049         if (queuesize != cfq->q_end - cfq->q_start) {
1050                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1051                     m, queuesize, cfq->q_end - cfq->q_start);
1052         }
1053 }
1054
1055 static void
1056 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1057 {
1058         CFIL_QUEUE_VERIFY(cfq);
1059
1060         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1061         cfq->q_end += len;
1062
1063         CFIL_QUEUE_VERIFY(cfq);
1064 }
1065
1066 static void
1067 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1068 {
1069         CFIL_QUEUE_VERIFY(cfq);
1070
1071         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1072
1073         MBUFQ_REMOVE(&cfq->q_mq, m);
1074         MBUFQ_NEXT(m) = NULL;
1075         cfq->q_start += len;
1076
1077         CFIL_QUEUE_VERIFY(cfq);
1078 }
1079
1080 static mbuf_t
1081 cfil_queue_first(struct cfil_queue *cfq)
1082 {
1083         return MBUFQ_FIRST(&cfq->q_mq);
1084 }
1085
1086 static mbuf_t
1087 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1088 {
1089 #pragma unused(cfq)
1090         return MBUFQ_NEXT(m);
1091 }
1092
1093 static void
1094 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1095 {
1096         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1097         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1098
1099         /* Verify the queues are ordered so that pending is before ctl */
1100         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1101
1102         /* The peek offset cannot be less than the pass offset */
1103         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1104
1105         /* Make sure we've updated the offset we peeked at  */
1106         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1107 }
1108
1109 static void
1110 cfil_entry_verify(struct cfil_entry *entry)
1111 {
1112         cfil_entry_buf_verify(&entry->cfe_snd);
1113         cfil_entry_buf_verify(&entry->cfe_rcv);
1114 }
1115
1116 static void
1117 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1118 {
1119         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1120
1121         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1122 }
1123
1124 static void
1125 cfil_info_verify(struct cfil_info *cfil_info)
1126 {
1127         int i;
1128
1129         if (cfil_info == NULL) {
1130                 return;
1131         }
1132
1133         cfil_info_buf_verify(&cfil_info->cfi_snd);
1134         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1135
1136         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1137                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1138         }
1139 }
1140
1141 static void
1142 verify_content_filter(struct content_filter *cfc)
1143 {
1144         struct cfil_entry *entry;
1145         uint32_t count = 0;
1146
1147         VERIFY(cfc->cf_sock_count >= 0);
1148
1149         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1150                 count++;
1151                 VERIFY(cfc == entry->cfe_filter);
1152         }
1153         VERIFY(count == cfc->cf_sock_count);
1154 }
1155
1156 /*
1157  * Kernel control socket callbacks
1158  */
1159 static errno_t
1160 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1161     void **unitinfo)
1162 {
1163         errno_t error = 0;
1164         struct content_filter *cfc = NULL;
1165
1166         CFIL_LOG(LOG_NOTICE, "");
1167
1168         cfc = zalloc(content_filter_zone);
1169         if (cfc == NULL) {
1170                 CFIL_LOG(LOG_ERR, "zalloc failed");
1171                 error = ENOMEM;
1172                 goto done;
1173         }
1174         bzero(cfc, sizeof(struct content_filter));
1175
1176         cfil_rw_lock_exclusive(&cfil_lck_rw);
1177         if (content_filters == NULL) {
1178                 struct content_filter **tmp;
1179
1180                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1181
1182                 MALLOC(tmp,
1183                     struct content_filter **,
1184                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1185                     M_TEMP,
1186                     M_WAITOK | M_ZERO);
1187
1188                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1189
1190                 if (tmp == NULL && content_filters == NULL) {
1191                         error = ENOMEM;
1192                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1193                         goto done;
1194                 }
1195                 /* Another thread may have won the race */
1196                 if (content_filters != NULL) {
1197                         FREE(tmp, M_TEMP);
1198                 } else {
1199                         content_filters = tmp;
1200                 }
1201         }
1202
1203         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1204                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1205                 error = EINVAL;
1206         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1207                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1208                 error = EADDRINUSE;
1209         } else {
1210                 /*
1211                  * kernel control socket kcunit numbers start at 1
1212                  */
1213                 content_filters[sac->sc_unit - 1] = cfc;
1214
1215                 cfc->cf_kcref = kctlref;
1216                 cfc->cf_kcunit = sac->sc_unit;
1217                 TAILQ_INIT(&cfc->cf_sock_entries);
1218
1219                 *unitinfo = cfc;
1220                 cfil_active_count++;
1221
1222                 // Allocate periodic stats buffer for this filter
1223                 if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] == NULL) {
1224                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1225
1226                         struct cfil_stats_report_buffer *buf;
1227
1228                         MALLOC(buf,
1229                             struct cfil_stats_report_buffer *,
1230                             sizeof(struct cfil_stats_report_buffer),
1231                             M_TEMP,
1232                             M_WAITOK | M_ZERO);
1233
1234                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1235
1236                         if (buf == NULL) {
1237                                 error = ENOMEM;
1238                                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1239                                 goto done;
1240                         }
1241
1242                         /* Another thread may have won the race */
1243                         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1244                                 FREE(buf, M_TEMP);
1245                         } else {
1246                                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = buf;
1247                         }
1248                 }
1249         }
1250         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1251 done:
1252         if (error != 0 && cfc != NULL) {
1253                 zfree(content_filter_zone, cfc);
1254         }
1255
1256         if (error == 0) {
1257                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1258         } else {
1259                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1260         }
1261
1262         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1263             error, cfil_active_count, sac->sc_unit);
1264
1265         return error;
1266 }
1267
1268 static errno_t
1269 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1270 {
1271 #pragma unused(kctlref)
1272         errno_t error = 0;
1273         struct content_filter *cfc;
1274         struct cfil_entry *entry;
1275         uint64_t sock_flow_id = 0;
1276
1277         CFIL_LOG(LOG_NOTICE, "");
1278
1279         if (content_filters == NULL) {
1280                 CFIL_LOG(LOG_ERR, "no content filter");
1281                 error = EINVAL;
1282                 goto done;
1283         }
1284         if (kcunit > MAX_CONTENT_FILTER) {
1285                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1286                     kcunit, MAX_CONTENT_FILTER);
1287                 error = EINVAL;
1288                 goto done;
1289         }
1290
1291         cfc = (struct content_filter *)unitinfo;
1292         if (cfc == NULL) {
1293                 goto done;
1294         }
1295
1296         cfil_rw_lock_exclusive(&cfil_lck_rw);
1297         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1298                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1299                     kcunit);
1300                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1301                 goto done;
1302         }
1303         cfc->cf_flags |= CFF_DETACHING;
1304         /*
1305          * Remove all sockets from the filter
1306          */
1307         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1308                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1309
1310                 verify_content_filter(cfc);
1311                 /*
1312                  * Accept all outstanding data by pushing to next filter
1313                  * or back to socket
1314                  *
1315                  * TBD: Actually we should make sure all data has been pushed
1316                  * back to socket
1317                  */
1318                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1319                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1320                         struct socket *so = cfil_info->cfi_so;
1321                         sock_flow_id = cfil_info->cfi_sock_id;
1322
1323                         /* Need to let data flow immediately */
1324                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1325                             CFEF_DATA_START;
1326
1327                         /*
1328                          * Respect locking hierarchy
1329                          */
1330                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1331
1332                         socket_lock(so, 1);
1333
1334                         /*
1335                          * When cfe_filter is NULL the filter is detached
1336                          * and the entry has been removed from cf_sock_entries
1337                          */
1338                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1339                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1340                                 goto release;
1341                         }
1342
1343                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1344                             CFM_MAX_OFFSET,
1345                             CFM_MAX_OFFSET);
1346
1347                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1348                             CFM_MAX_OFFSET,
1349                             CFM_MAX_OFFSET);
1350
1351                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1352
1353                         /*
1354                          * Check again to make sure if the cfil_info is still valid
1355                          * as the socket may have been unlocked when when calling
1356                          * cfil_acquire_sockbuf()
1357                          */
1358                         if (entry->cfe_filter == NULL ||
1359                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1360                                 goto release;
1361                         }
1362
1363                         /* The filter is now detached */
1364                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1365 #if LIFECYCLE_DEBUG
1366                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1367 #endif
1368                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1369                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1370                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1371                             cfil_filters_attached(so) == 0) {
1372                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1373                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1374                                 wakeup((caddr_t)cfil_info);
1375                         }
1376
1377                         /*
1378                          * Remove the filter entry from the content filter
1379                          * but leave the rest of the state intact as the queues
1380                          * may not be empty yet
1381                          */
1382                         entry->cfe_filter = NULL;
1383                         entry->cfe_necp_control_unit = 0;
1384
1385                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1386                         cfc->cf_sock_count--;
1387 release:
1388                         socket_unlock(so, 1);
1389                 }
1390         }
1391         verify_content_filter(cfc);
1392
1393         /* Free the stats buffer for this filter */
1394         if (global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] != NULL) {
1395                 FREE(global_cfil_stats_report_buffers[cfc->cf_kcunit - 1], M_TEMP);
1396                 global_cfil_stats_report_buffers[cfc->cf_kcunit - 1] = NULL;
1397         }
1398         VERIFY(cfc->cf_sock_count == 0);
1399
1400         /*
1401          * Make filter inactive
1402          */
1403         content_filters[kcunit - 1] = NULL;
1404         cfil_active_count--;
1405         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1406
1407         if (cfc->cf_crypto_state != NULL) {
1408                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1409                 cfc->cf_crypto_state = NULL;
1410         }
1411
1412         zfree(content_filter_zone, cfc);
1413 done:
1414         if (error == 0) {
1415                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1416         } else {
1417                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1418         }
1419
1420         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1421             error, cfil_active_count, kcunit);
1422
1423         return error;
1424 }
1425
1426 /*
1427  * cfil_acquire_sockbuf()
1428  *
1429  * Prevent any other thread from acquiring the sockbuf
1430  * We use sb_cfil_thread as a semaphore to prevent other threads from
1431  * messing with the sockbuf -- see sblock()
1432  * Note: We do not set SB_LOCK here because the thread may check or modify
1433  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1434  * sblock(), sbunlock() or sodefunct()
1435  */
1436 static int
1437 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1438 {
1439         thread_t tp = current_thread();
1440         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1441         lck_mtx_t *mutex_held;
1442         int error = 0;
1443
1444         /*
1445          * Wait until no thread is holding the sockbuf and other content
1446          * filter threads have released the sockbuf
1447          */
1448         while ((sb->sb_flags & SB_LOCK) ||
1449             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1450                 if (so->so_proto->pr_getlock != NULL) {
1451                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1452                 } else {
1453                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1454                 }
1455
1456                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1457
1458                 sb->sb_wantlock++;
1459                 VERIFY(sb->sb_wantlock != 0);
1460
1461                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1462                     NULL);
1463
1464                 VERIFY(sb->sb_wantlock != 0);
1465                 sb->sb_wantlock--;
1466         }
1467         /*
1468          * Use reference count for repetitive calls on same thread
1469          */
1470         if (sb->sb_cfil_refs == 0) {
1471                 VERIFY(sb->sb_cfil_thread == NULL);
1472                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1473
1474                 sb->sb_cfil_thread = tp;
1475                 sb->sb_flags |= SB_LOCK;
1476         }
1477         sb->sb_cfil_refs++;
1478
1479         /* We acquire the socket buffer when we need to cleanup */
1480         if (cfil_info == NULL) {
1481                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1482                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1483                 error = 0;
1484         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1485                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1486                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1487                 error = EPIPE;
1488         }
1489
1490         return error;
1491 }
1492
1493 static void
1494 cfil_release_sockbuf(struct socket *so, int outgoing)
1495 {
1496         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1497         thread_t tp = current_thread();
1498
1499         socket_lock_assert_owned(so);
1500
1501         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1502                 panic("%s sb_cfil_thread %p not current %p", __func__,
1503                     sb->sb_cfil_thread, tp);
1504         }
1505         /*
1506          * Don't panic if we are defunct because SB_LOCK has
1507          * been cleared by sodefunct()
1508          */
1509         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1510                 panic("%s SB_LOCK not set on %p", __func__,
1511                     sb);
1512         }
1513         /*
1514          * We can unlock when the thread unwinds to the last reference
1515          */
1516         sb->sb_cfil_refs--;
1517         if (sb->sb_cfil_refs == 0) {
1518                 sb->sb_cfil_thread = NULL;
1519                 sb->sb_flags &= ~SB_LOCK;
1520
1521                 if (sb->sb_wantlock > 0) {
1522                         wakeup(&sb->sb_flags);
1523                 }
1524         }
1525 }
1526
1527 cfil_sock_id_t
1528 cfil_sock_id_from_socket(struct socket *so)
1529 {
1530         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1531                 return so->so_cfil->cfi_sock_id;
1532         } else {
1533                 return CFIL_SOCK_ID_NONE;
1534         }
1535 }
1536
1537 static bool
1538 cfil_socket_safe_lock(struct inpcb *inp)
1539 {
1540         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1541                 socket_lock(inp->inp_socket, 1);
1542                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1543                         return true;
1544                 }
1545                 socket_unlock(inp->inp_socket, 1);
1546         }
1547         return false;
1548 }
1549
1550 static struct socket *
1551 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1552 {
1553         struct socket *so = NULL;
1554         u_int64_t gencnt = cfil_sock_id >> 32;
1555         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1556         struct inpcb *inp = NULL;
1557         struct inpcbinfo *pcbinfo = NULL;
1558
1559 #if VERDICT_DEBUG
1560         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1561 #endif
1562
1563         if (udp_only) {
1564                 goto find_udp;
1565         }
1566
1567         pcbinfo = &tcbinfo;
1568         lck_rw_lock_shared(pcbinfo->ipi_lock);
1569         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1570                 if (inp->inp_state != INPCB_STATE_DEAD &&
1571                     inp->inp_socket != NULL &&
1572                     inp->inp_flowhash == flowhash &&
1573                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1574                     inp->inp_socket->so_cfil != NULL) {
1575                         if (cfil_socket_safe_lock(inp)) {
1576                                 so = inp->inp_socket;
1577                         }
1578                         break;
1579                 }
1580         }
1581         lck_rw_done(pcbinfo->ipi_lock);
1582         if (so != NULL) {
1583                 goto done;
1584         }
1585
1586 find_udp:
1587
1588         pcbinfo = &udbinfo;
1589         lck_rw_lock_shared(pcbinfo->ipi_lock);
1590         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1591                 if (inp->inp_state != INPCB_STATE_DEAD &&
1592                     inp->inp_socket != NULL &&
1593                     inp->inp_socket->so_cfil_db != NULL &&
1594                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1595                         if (cfil_socket_safe_lock(inp)) {
1596                                 so = inp->inp_socket;
1597                         }
1598                         break;
1599                 }
1600         }
1601         lck_rw_done(pcbinfo->ipi_lock);
1602
1603 done:
1604         if (so == NULL) {
1605                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1606                 CFIL_LOG(LOG_DEBUG,
1607                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1608                     cfil_sock_id, gencnt, flowhash);
1609         }
1610
1611         return so;
1612 }
1613
1614 static struct socket *
1615 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1616 {
1617         struct socket *so = NULL;
1618         struct inpcb *inp = NULL;
1619         struct inpcbinfo *pcbinfo = &tcbinfo;
1620
1621         lck_rw_lock_shared(pcbinfo->ipi_lock);
1622         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1623                 if (inp->inp_state != INPCB_STATE_DEAD &&
1624                     inp->inp_socket != NULL &&
1625                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1626                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1627                         if (cfil_socket_safe_lock(inp)) {
1628                                 so = inp->inp_socket;
1629                         }
1630                         break;
1631                 }
1632         }
1633         lck_rw_done(pcbinfo->ipi_lock);
1634         if (so != NULL) {
1635                 goto done;
1636         }
1637
1638         pcbinfo = &udbinfo;
1639         lck_rw_lock_shared(pcbinfo->ipi_lock);
1640         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1641                 if (inp->inp_state != INPCB_STATE_DEAD &&
1642                     inp->inp_socket != NULL &&
1643                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1644                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1645                         if (cfil_socket_safe_lock(inp)) {
1646                                 so = inp->inp_socket;
1647                         }
1648                         break;
1649                 }
1650         }
1651         lck_rw_done(pcbinfo->ipi_lock);
1652
1653 done:
1654         return so;
1655 }
1656
1657 static void
1658 cfil_info_stats_toggle(struct cfil_info *cfil_info, struct cfil_entry *entry, uint32_t report_frequency)
1659 {
1660         struct cfil_info *cfil = NULL;
1661         Boolean found = FALSE;
1662         int kcunit;
1663
1664         if (cfil_info == NULL) {
1665                 return;
1666         }
1667
1668         if (report_frequency) {
1669                 if (entry == NULL) {
1670                         return;
1671                 }
1672
1673                 // Update stats reporting frequency.
1674                 if (entry->cfe_stats_report_frequency != report_frequency) {
1675                         entry->cfe_stats_report_frequency = report_frequency;
1676                         if (entry->cfe_stats_report_frequency < CFIL_STATS_REPORT_INTERVAL_MIN_MSEC) {
1677                                 entry->cfe_stats_report_frequency = CFIL_STATS_REPORT_INTERVAL_MIN_MSEC;
1678                         }
1679                         microuptime(&entry->cfe_stats_report_ts);
1680
1681                         // Insert cfil_info into list only if it is not in yet.
1682                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1683                                 if (cfil == cfil_info) {
1684                                         return;
1685                                 }
1686                         }
1687
1688                         TAILQ_INSERT_TAIL(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1689
1690                         // Wake up stats thread if this is first flow added
1691                         if (cfil_sock_attached_stats_count == 0) {
1692                                 thread_wakeup((caddr_t)&cfil_sock_attached_stats_count);
1693                         }
1694                         cfil_sock_attached_stats_count++;
1695 #if STATS_DEBUG
1696                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW INSERTED: <so %llx sockID %llu> stats frequency %d msecs",
1697                             cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1698                             cfil_info->cfi_sock_id,
1699                             entry->cfe_stats_report_frequency);
1700 #endif
1701                 }
1702         } else {
1703                 // Turn off stats reporting for this filter.
1704                 if (entry != NULL) {
1705                         // Already off, no change.
1706                         if (entry->cfe_stats_report_frequency == 0) {
1707                                 return;
1708                         }
1709
1710                         entry->cfe_stats_report_frequency = 0;
1711                         // If cfil_info still has filter(s) asking for stats, no need to remove from list.
1712                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
1713                                 if (cfil_info->cfi_entries[kcunit - 1].cfe_stats_report_frequency > 0) {
1714                                         return;
1715                                 }
1716                         }
1717                 }
1718
1719                 // No more filter asking for stats for this cfil_info, remove from list.
1720                 if (!TAILQ_EMPTY(&cfil_sock_head_stats)) {
1721                         found = FALSE;
1722                         TAILQ_FOREACH(cfil, &cfil_sock_head_stats, cfi_link_stats) {
1723                                 if (cfil == cfil_info) {
1724                                         found = TRUE;
1725                                         break;
1726                                 }
1727                         }
1728                         if (found) {
1729                                 cfil_sock_attached_stats_count--;
1730                                 TAILQ_REMOVE(&cfil_sock_head_stats, cfil_info, cfi_link_stats);
1731 #if STATS_DEBUG
1732                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED - STATS FLOW DELETED: <so %llx sockID %llu> stats frequency reset",
1733                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0,
1734                                     cfil_info->cfi_sock_id);
1735 #endif
1736                         }
1737                 }
1738         }
1739 }
1740
1741 static errno_t
1742 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1743     int flags)
1744 {
1745 #pragma unused(kctlref, flags)
1746         errno_t error = 0;
1747         struct cfil_msg_hdr *msghdr;
1748         struct content_filter *cfc = (struct content_filter *)unitinfo;
1749         struct socket *so;
1750         struct cfil_msg_action *action_msg;
1751         struct cfil_entry *entry;
1752         struct cfil_info *cfil_info = NULL;
1753         unsigned int data_len = 0;
1754
1755         CFIL_LOG(LOG_INFO, "");
1756
1757         if (content_filters == NULL) {
1758                 CFIL_LOG(LOG_ERR, "no content filter");
1759                 error = EINVAL;
1760                 goto done;
1761         }
1762         if (kcunit > MAX_CONTENT_FILTER) {
1763                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1764                     kcunit, MAX_CONTENT_FILTER);
1765                 error = EINVAL;
1766                 goto done;
1767         }
1768         if (m == NULL) {
1769                 CFIL_LOG(LOG_ERR, "null mbuf");
1770                 error = EINVAL;
1771                 goto done;
1772         }
1773         data_len = m_length(m);
1774
1775         if (data_len < sizeof(struct cfil_msg_hdr)) {
1776                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1777                 error = EINVAL;
1778                 goto done;
1779         }
1780         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1781         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1782                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1783                 error = EINVAL;
1784                 goto done;
1785         }
1786         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1787                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1788                 error = EINVAL;
1789                 goto done;
1790         }
1791         if (msghdr->cfm_len > data_len) {
1792                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1793                 error = EINVAL;
1794                 goto done;
1795         }
1796
1797         /* Validate action operation */
1798         switch (msghdr->cfm_op) {
1799         case CFM_OP_DATA_UPDATE:
1800                 OSIncrementAtomic(
1801                         &cfil_stats.cfs_ctl_action_data_update);
1802                 break;
1803         case CFM_OP_DROP:
1804                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1805                 break;
1806         case CFM_OP_BLESS_CLIENT:
1807                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1808                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1809                         error = EINVAL;
1810                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1811                             msghdr->cfm_len,
1812                             msghdr->cfm_op);
1813                         goto done;
1814                 }
1815                 error = cfil_action_bless_client(kcunit, msghdr);
1816                 goto done;
1817         case CFM_OP_SET_CRYPTO_KEY:
1818                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1819                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1820                         error = EINVAL;
1821                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1822                             msghdr->cfm_len,
1823                             msghdr->cfm_op);
1824                         goto done;
1825                 }
1826                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1827                 goto done;
1828         default:
1829                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1830                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1831                 error = EINVAL;
1832                 goto done;
1833         }
1834         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1835                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1836                 error = EINVAL;
1837                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1838                     msghdr->cfm_len,
1839                     msghdr->cfm_op);
1840                 goto done;
1841         }
1842         cfil_rw_lock_shared(&cfil_lck_rw);
1843         if (cfc != (void *)content_filters[kcunit - 1]) {
1844                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1845                     kcunit);
1846                 error = EINVAL;
1847                 cfil_rw_unlock_shared(&cfil_lck_rw);
1848                 goto done;
1849         }
1850         cfil_rw_unlock_shared(&cfil_lck_rw);
1851
1852         // Search for socket (TCP+UDP and lock so)
1853         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1854         if (so == NULL) {
1855                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1856                     msghdr->cfm_sock_id);
1857                 error = EINVAL;
1858                 goto done;
1859         }
1860
1861         cfil_info = so->so_cfil_db != NULL ?
1862             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1863
1864         if (cfil_info == NULL) {
1865                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1866                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1867                 error = EINVAL;
1868                 goto unlock;
1869         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1870                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1871                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1872                 error = EINVAL;
1873                 goto unlock;
1874         }
1875         entry = &cfil_info->cfi_entries[kcunit - 1];
1876         if (entry->cfe_filter == NULL) {
1877                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1878                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1879                 error = EINVAL;
1880                 goto unlock;
1881         }
1882
1883         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1884                 entry->cfe_flags |= CFEF_DATA_START;
1885         } else {
1886                 CFIL_LOG(LOG_ERR,
1887                     "so %llx attached not sent for %u",
1888                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1889                 error = EINVAL;
1890                 goto unlock;
1891         }
1892
1893         microuptime(&entry->cfe_last_action);
1894         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1895
1896         action_msg = (struct cfil_msg_action *)msghdr;
1897
1898         switch (msghdr->cfm_op) {
1899         case CFM_OP_DATA_UPDATE:
1900 #if VERDICT_DEBUG
1901                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1902                     (uint64_t)VM_KERNEL_ADDRPERM(so),
1903                     cfil_info->cfi_sock_id,
1904                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1905                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1906 #endif
1907                 /*
1908                  * Received verdict, at this point we know this
1909                  * socket connection is allowed.  Unblock thread
1910                  * immediately before proceeding to process the verdict.
1911                  */
1912                 cfil_sock_received_verdict(so);
1913
1914                 if (action_msg->cfa_out_peek_offset != 0 ||
1915                     action_msg->cfa_out_pass_offset != 0) {
1916                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1917                             action_msg->cfa_out_pass_offset,
1918                             action_msg->cfa_out_peek_offset);
1919                 }
1920                 if (error == EJUSTRETURN) {
1921                         error = 0;
1922                 }
1923                 if (error != 0) {
1924                         break;
1925                 }
1926                 if (action_msg->cfa_in_peek_offset != 0 ||
1927                     action_msg->cfa_in_pass_offset != 0) {
1928                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1929                             action_msg->cfa_in_pass_offset,
1930                             action_msg->cfa_in_peek_offset);
1931                 }
1932                 if (error == EJUSTRETURN) {
1933                         error = 0;
1934                 }
1935
1936                 // Toggle stats reporting according to received verdict.
1937                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1938                 cfil_info_stats_toggle(cfil_info, entry, action_msg->cfa_stats_frequency);
1939                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1940
1941                 break;
1942
1943         case CFM_OP_DROP:
1944 #if VERDICT_DEBUG
1945                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1946                     (uint64_t)VM_KERNEL_ADDRPERM(so),
1947                     cfil_info->cfi_sock_id,
1948                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1949                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1950 #endif
1951                 error = cfil_action_drop(so, cfil_info, kcunit);
1952                 cfil_sock_received_verdict(so);
1953                 break;
1954
1955         default:
1956                 error = EINVAL;
1957                 break;
1958         }
1959 unlock:
1960         socket_unlock(so, 1);
1961 done:
1962         mbuf_freem(m);
1963
1964         if (error == 0) {
1965                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1966         } else {
1967                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1968         }
1969
1970         return error;
1971 }
1972
1973 static errno_t
1974 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1975     int opt, void *data, size_t *len)
1976 {
1977 #pragma unused(kctlref, opt)
1978         struct cfil_info *cfil_info = NULL;
1979         errno_t error = 0;
1980         struct content_filter *cfc = (struct content_filter *)unitinfo;
1981
1982         CFIL_LOG(LOG_NOTICE, "");
1983
1984         cfil_rw_lock_shared(&cfil_lck_rw);
1985
1986         if (content_filters == NULL) {
1987                 CFIL_LOG(LOG_ERR, "no content filter");
1988                 error = EINVAL;
1989                 goto done;
1990         }
1991         if (kcunit > MAX_CONTENT_FILTER) {
1992                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1993                     kcunit, MAX_CONTENT_FILTER);
1994                 error = EINVAL;
1995                 goto done;
1996         }
1997         if (cfc != (void *)content_filters[kcunit - 1]) {
1998                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1999                     kcunit);
2000                 error = EINVAL;
2001                 goto done;
2002         }
2003         switch (opt) {
2004         case CFIL_OPT_NECP_CONTROL_UNIT:
2005                 if (*len < sizeof(uint32_t)) {
2006                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
2007                         error = EINVAL;
2008                         goto done;
2009                 }
2010                 if (data != NULL) {
2011                         *(uint32_t *)data = cfc->cf_necp_control_unit;
2012                 }
2013                 break;
2014         case CFIL_OPT_GET_SOCKET_INFO:
2015                 if (*len != sizeof(struct cfil_opt_sock_info)) {
2016                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
2017                         error = EINVAL;
2018                         goto done;
2019                 }
2020                 if (data == NULL) {
2021                         CFIL_LOG(LOG_ERR, "data not passed");
2022                         error = EINVAL;
2023                         goto done;
2024                 }
2025
2026                 struct cfil_opt_sock_info *sock_info =
2027                     (struct cfil_opt_sock_info *) data;
2028
2029                 // Unlock here so that we never hold both cfil_lck_rw and the
2030                 // socket_lock at the same time. Otherwise, this can deadlock
2031                 // because soclose() takes the socket_lock and then exclusive
2032                 // cfil_lck_rw and we require the opposite order.
2033
2034                 // WARNING: Be sure to never use anything protected
2035                 //     by cfil_lck_rw beyond this point.
2036                 // WARNING: Be sure to avoid fallthrough and
2037                 //     goto return_already_unlocked from this branch.
2038                 cfil_rw_unlock_shared(&cfil_lck_rw);
2039
2040                 // Search (TCP+UDP) and lock socket
2041                 struct socket *sock =
2042                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
2043                 if (sock == NULL) {
2044 #if LIFECYCLE_DEBUG
2045                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
2046                             sock_info->cfs_sock_id);
2047 #endif
2048                         error = ENOENT;
2049                         goto return_already_unlocked;
2050                 }
2051
2052                 cfil_info = (sock->so_cfil_db != NULL) ?
2053                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
2054
2055                 if (cfil_info == NULL) {
2056 #if LIFECYCLE_DEBUG
2057                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
2058                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
2059 #endif
2060                         error = EINVAL;
2061                         socket_unlock(sock, 1);
2062                         goto return_already_unlocked;
2063                 }
2064
2065                 // Fill out family, type, and protocol
2066                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
2067                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
2068                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
2069
2070                 // Source and destination addresses
2071                 struct inpcb *inp = sotoinpcb(sock);
2072                 if (inp->inp_vflag & INP_IPV6) {
2073                         struct in6_addr *laddr = NULL, *faddr = NULL;
2074                         u_int16_t lport = 0, fport = 0;
2075
2076                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2077                             &laddr, &faddr, &lport, &fport);
2078                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2079                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2080                 } else if (inp->inp_vflag & INP_IPV4) {
2081                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
2082                         u_int16_t lport = 0, fport = 0;
2083
2084                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2085                             &laddr, &faddr, &lport, &fport);
2086                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
2087                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
2088                 }
2089
2090                 // Set the pid info
2091                 sock_info->cfs_pid = sock->last_pid;
2092                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
2093
2094                 if (sock->so_flags & SOF_DELEGATED) {
2095                         sock_info->cfs_e_pid = sock->e_pid;
2096                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
2097                 } else {
2098                         sock_info->cfs_e_pid = sock->last_pid;
2099                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
2100                 }
2101
2102                 socket_unlock(sock, 1);
2103
2104                 goto return_already_unlocked;
2105         default:
2106                 error = ENOPROTOOPT;
2107                 break;
2108         }
2109 done:
2110         cfil_rw_unlock_shared(&cfil_lck_rw);
2111
2112         return error;
2113
2114 return_already_unlocked:
2115
2116         return error;
2117 }
2118
2119 static errno_t
2120 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
2121     int opt, void *data, size_t len)
2122 {
2123 #pragma unused(kctlref, opt)
2124         errno_t error = 0;
2125         struct content_filter *cfc = (struct content_filter *)unitinfo;
2126
2127         CFIL_LOG(LOG_NOTICE, "");
2128
2129         cfil_rw_lock_exclusive(&cfil_lck_rw);
2130
2131         if (content_filters == NULL) {
2132                 CFIL_LOG(LOG_ERR, "no content filter");
2133                 error = EINVAL;
2134                 goto done;
2135         }
2136         if (kcunit > MAX_CONTENT_FILTER) {
2137                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2138                     kcunit, MAX_CONTENT_FILTER);
2139                 error = EINVAL;
2140                 goto done;
2141         }
2142         if (cfc != (void *)content_filters[kcunit - 1]) {
2143                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2144                     kcunit);
2145                 error = EINVAL;
2146                 goto done;
2147         }
2148         switch (opt) {
2149         case CFIL_OPT_NECP_CONTROL_UNIT:
2150                 if (len < sizeof(uint32_t)) {
2151                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2152                             "len too small %lu", len);
2153                         error = EINVAL;
2154                         goto done;
2155                 }
2156                 if (cfc->cf_necp_control_unit != 0) {
2157                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2158                             "already set %u",
2159                             cfc->cf_necp_control_unit);
2160                         error = EINVAL;
2161                         goto done;
2162                 }
2163                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2164                 break;
2165         default:
2166                 error = ENOPROTOOPT;
2167                 break;
2168         }
2169 done:
2170         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2171
2172         return error;
2173 }
2174
2175
2176 static void
2177 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2178 {
2179 #pragma unused(kctlref, flags)
2180         struct content_filter *cfc = (struct content_filter *)unitinfo;
2181         struct socket *so = NULL;
2182         int error;
2183         struct cfil_entry *entry;
2184         struct cfil_info *cfil_info = NULL;
2185
2186         CFIL_LOG(LOG_INFO, "");
2187
2188         if (content_filters == NULL) {
2189                 CFIL_LOG(LOG_ERR, "no content filter");
2190                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2191                 return;
2192         }
2193         if (kcunit > MAX_CONTENT_FILTER) {
2194                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2195                     kcunit, MAX_CONTENT_FILTER);
2196                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2197                 return;
2198         }
2199         cfil_rw_lock_shared(&cfil_lck_rw);
2200         if (cfc != (void *)content_filters[kcunit - 1]) {
2201                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2202                     kcunit);
2203                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2204                 goto done;
2205         }
2206         /* Let's assume the flow control is lifted */
2207         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2208                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2209                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2210                 }
2211
2212                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2213
2214                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2215                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2216         }
2217         /*
2218          * Flow control will be raised again as soon as an entry cannot enqueue
2219          * to the kernel control socket
2220          */
2221         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2222                 verify_content_filter(cfc);
2223
2224                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2225
2226                 /* Find an entry that is flow controlled */
2227                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2228                         if (entry->cfe_cfil_info == NULL ||
2229                             entry->cfe_cfil_info->cfi_so == NULL) {
2230                                 continue;
2231                         }
2232                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2233                                 continue;
2234                         }
2235                 }
2236                 if (entry == NULL) {
2237                         break;
2238                 }
2239
2240                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2241
2242                 cfil_info = entry->cfe_cfil_info;
2243                 so = cfil_info->cfi_so;
2244
2245                 cfil_rw_unlock_shared(&cfil_lck_rw);
2246                 socket_lock(so, 1);
2247
2248                 do {
2249                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2250                         if (error == 0) {
2251                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2252                         }
2253                         cfil_release_sockbuf(so, 1);
2254                         if (error != 0) {
2255                                 break;
2256                         }
2257
2258                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2259                         if (error == 0) {
2260                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2261                         }
2262                         cfil_release_sockbuf(so, 0);
2263                 } while (0);
2264
2265                 socket_lock_assert_owned(so);
2266                 socket_unlock(so, 1);
2267
2268                 cfil_rw_lock_shared(&cfil_lck_rw);
2269         }
2270 done:
2271         cfil_rw_unlock_shared(&cfil_lck_rw);
2272 }
2273
2274 void
2275 cfil_init(void)
2276 {
2277         struct kern_ctl_reg kern_ctl;
2278         errno_t error = 0;
2279         vm_size_t content_filter_size = 0;      /* size of content_filter */
2280         vm_size_t cfil_info_size = 0;   /* size of cfil_info */
2281         vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2282         vm_size_t cfil_db_size = 0; /* size of cfil_db */
2283         unsigned int mbuf_limit = 0;
2284
2285         CFIL_LOG(LOG_NOTICE, "");
2286
2287         /*
2288          * Compile time verifications
2289          */
2290         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2291         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2292         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2293         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2294
2295         /*
2296          * Runtime time verifications
2297          */
2298         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2299             sizeof(uint32_t)));
2300         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2301             sizeof(uint32_t)));
2302         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2303             sizeof(uint32_t)));
2304         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2305             sizeof(uint32_t)));
2306
2307         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2308             sizeof(uint32_t)));
2309         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2310             sizeof(uint32_t)));
2311
2312         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2313             sizeof(uint32_t)));
2314         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2315             sizeof(uint32_t)));
2316         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2317             sizeof(uint32_t)));
2318         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2319             sizeof(uint32_t)));
2320
2321         /*
2322          * Zone for content filters kernel control sockets
2323          */
2324         content_filter_size = sizeof(struct content_filter);
2325         content_filter_zone = zinit(content_filter_size,
2326             CONTENT_FILTER_ZONE_MAX * content_filter_size,
2327             0,
2328             CONTENT_FILTER_ZONE_NAME);
2329         if (content_filter_zone == NULL) {
2330                 panic("%s: zinit(%s) failed", __func__,
2331                     CONTENT_FILTER_ZONE_NAME);
2332                 /* NOTREACHED */
2333         }
2334         zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2335         zone_change(content_filter_zone, Z_EXPAND, TRUE);
2336
2337         /*
2338          * Zone for per socket content filters
2339          */
2340         cfil_info_size = sizeof(struct cfil_info);
2341         cfil_info_zone = zinit(cfil_info_size,
2342             CFIL_INFO_ZONE_MAX * cfil_info_size,
2343             0,
2344             CFIL_INFO_ZONE_NAME);
2345         if (cfil_info_zone == NULL) {
2346                 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2347                 /* NOTREACHED */
2348         }
2349         zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2350         zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2351
2352         /*
2353          * Zone for content filters cfil hash entries and db
2354          */
2355         cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2356         cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2357             CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2358             0,
2359             CFIL_HASH_ENTRY_ZONE_NAME);
2360         if (cfil_hash_entry_zone == NULL) {
2361                 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2362                 /* NOTREACHED */
2363         }
2364         zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2365         zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2366
2367         cfil_db_size = sizeof(struct cfil_db);
2368         cfil_db_zone = zinit(cfil_db_size,
2369             CFIL_DB_ZONE_MAX * cfil_db_size,
2370             0,
2371             CFIL_DB_ZONE_NAME);
2372         if (cfil_db_zone == NULL) {
2373                 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2374                 /* NOTREACHED */
2375         }
2376         zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2377         zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2378
2379         /*
2380          * Allocate locks
2381          */
2382         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2383         if (cfil_lck_grp_attr == NULL) {
2384                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2385                 /* NOTREACHED */
2386         }
2387         cfil_lck_grp = lck_grp_alloc_init("content filter",
2388             cfil_lck_grp_attr);
2389         if (cfil_lck_grp == NULL) {
2390                 panic("%s: lck_grp_alloc_init failed", __func__);
2391                 /* NOTREACHED */
2392         }
2393         cfil_lck_attr = lck_attr_alloc_init();
2394         if (cfil_lck_attr == NULL) {
2395                 panic("%s: lck_attr_alloc_init failed", __func__);
2396                 /* NOTREACHED */
2397         }
2398         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2399
2400         TAILQ_INIT(&cfil_sock_head);
2401         TAILQ_INIT(&cfil_sock_head_stats);
2402
2403         /*
2404          * Register kernel control
2405          */
2406         bzero(&kern_ctl, sizeof(kern_ctl));
2407         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2408             sizeof(kern_ctl.ctl_name));
2409         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2410         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2411         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2412         kern_ctl.ctl_connect = cfil_ctl_connect;
2413         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2414         kern_ctl.ctl_send = cfil_ctl_send;
2415         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2416         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2417         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2418         error = ctl_register(&kern_ctl, &cfil_kctlref);
2419         if (error != 0) {
2420                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2421                 return;
2422         }
2423
2424         // Spawn thread for gargage collection
2425         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2426             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2427                 panic_plain("%s: Can't create UDP GC thread", __func__);
2428                 /* NOTREACHED */
2429         }
2430         /* this must not fail */
2431         VERIFY(cfil_udp_gc_thread != NULL);
2432
2433         // Spawn thread for statistics reporting
2434         if (kernel_thread_start(cfil_stats_report_thread_func, NULL,
2435             &cfil_stats_report_thread) != KERN_SUCCESS) {
2436                 panic_plain("%s: Can't create statistics report thread", __func__);
2437                 /* NOTREACHED */
2438         }
2439         /* this must not fail */
2440         VERIFY(cfil_stats_report_thread != NULL);
2441
2442         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2443         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2444         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2445         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2446
2447         memset(&global_cfil_stats_report_buffers, 0, sizeof(global_cfil_stats_report_buffers));
2448 }
2449
2450 struct cfil_info *
2451 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2452 {
2453         int kcunit;
2454         struct cfil_info *cfil_info = NULL;
2455         struct inpcb *inp = sotoinpcb(so);
2456
2457         CFIL_LOG(LOG_INFO, "");
2458
2459         socket_lock_assert_owned(so);
2460
2461         cfil_info = zalloc(cfil_info_zone);
2462         if (cfil_info == NULL) {
2463                 goto done;
2464         }
2465         bzero(cfil_info, sizeof(struct cfil_info));
2466
2467         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2468         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2469
2470         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2471                 struct cfil_entry *entry;
2472
2473                 entry = &cfil_info->cfi_entries[kcunit - 1];
2474                 entry->cfe_cfil_info = cfil_info;
2475
2476                 /* Initialize the filter entry */
2477                 entry->cfe_filter = NULL;
2478                 entry->cfe_flags = 0;
2479                 entry->cfe_necp_control_unit = 0;
2480                 entry->cfe_snd.cfe_pass_offset = 0;
2481                 entry->cfe_snd.cfe_peek_offset = 0;
2482                 entry->cfe_snd.cfe_peeked = 0;
2483                 entry->cfe_rcv.cfe_pass_offset = 0;
2484                 entry->cfe_rcv.cfe_peek_offset = 0;
2485                 entry->cfe_rcv.cfe_peeked = 0;
2486                 /*
2487                  * Timestamp the last action to avoid pre-maturely
2488                  * triggering garbage collection
2489                  */
2490                 microuptime(&entry->cfe_last_action);
2491
2492                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2493                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2494                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2495                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2496         }
2497
2498         cfil_rw_lock_exclusive(&cfil_lck_rw);
2499
2500         /*
2501          * Create a cfi_sock_id that's not the socket pointer!
2502          */
2503
2504         if (hash_entry == NULL) {
2505                 // This is the TCP case, cfil_info is tracked per socket
2506                 if (inp->inp_flowhash == 0) {
2507                         inp->inp_flowhash = inp_calc_flowhash(inp);
2508                 }
2509
2510                 so->so_cfil = cfil_info;
2511                 cfil_info->cfi_so = so;
2512                 cfil_info->cfi_sock_id =
2513                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2514         } else {
2515                 // This is the UDP case, cfil_info is tracked in per-socket hash
2516                 cfil_info->cfi_so = so;
2517                 hash_entry->cfentry_cfil = cfil_info;
2518                 cfil_info->cfi_hash_entry = hash_entry;
2519                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2520                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2521                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2522
2523                 // Wake up gc thread if this is first flow added
2524                 if (cfil_sock_udp_attached_count == 0) {
2525                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2526                 }
2527
2528                 cfil_sock_udp_attached_count++;
2529         }
2530
2531         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2532         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2533
2534         cfil_sock_attached_count++;
2535
2536         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2537
2538 done:
2539         if (cfil_info != NULL) {
2540                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2541         } else {
2542                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2543         }
2544
2545         return cfil_info;
2546 }
2547
2548 int
2549 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2550 {
2551         int kcunit;
2552         int attached = 0;
2553
2554         CFIL_LOG(LOG_INFO, "");
2555
2556         socket_lock_assert_owned(so);
2557
2558         cfil_rw_lock_exclusive(&cfil_lck_rw);
2559
2560         for (kcunit = 1;
2561             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2562             kcunit++) {
2563                 struct content_filter *cfc = content_filters[kcunit - 1];
2564                 struct cfil_entry *entry;
2565                 struct cfil_entry *iter_entry;
2566                 struct cfil_entry *iter_prev;
2567
2568                 if (cfc == NULL) {
2569                         continue;
2570                 }
2571                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2572                         continue;
2573                 }
2574
2575                 entry = &cfil_info->cfi_entries[kcunit - 1];
2576
2577                 entry->cfe_filter = cfc;
2578                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2579                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2580                 cfc->cf_sock_count++;
2581
2582                 /* Insert the entry into the list ordered by control unit */
2583                 iter_prev = NULL;
2584                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2585                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2586                                 break;
2587                         }
2588                         iter_prev = iter_entry;
2589                 }
2590
2591                 if (iter_prev == NULL) {
2592                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2593                 } else {
2594                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2595                 }
2596
2597                 verify_content_filter(cfc);
2598                 attached = 1;
2599                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2600         }
2601
2602         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2603
2604         return attached;
2605 }
2606
2607 static void
2608 cfil_info_free(struct cfil_info *cfil_info)
2609 {
2610         int kcunit;
2611         uint64_t in_drain = 0;
2612         uint64_t out_drained = 0;
2613
2614         if (cfil_info == NULL) {
2615                 return;
2616         }
2617
2618         CFIL_LOG(LOG_INFO, "");
2619
2620         cfil_rw_lock_exclusive(&cfil_lck_rw);
2621
2622         for (kcunit = 1;
2623             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2624             kcunit++) {
2625                 struct cfil_entry *entry;
2626                 struct content_filter *cfc;
2627
2628                 entry = &cfil_info->cfi_entries[kcunit - 1];
2629
2630                 /* Don't be silly and try to detach twice */
2631                 if (entry->cfe_filter == NULL) {
2632                         continue;
2633                 }
2634
2635                 cfc = content_filters[kcunit - 1];
2636
2637                 VERIFY(cfc == entry->cfe_filter);
2638
2639                 entry->cfe_filter = NULL;
2640                 entry->cfe_necp_control_unit = 0;
2641                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2642                 cfc->cf_sock_count--;
2643
2644                 verify_content_filter(cfc);
2645         }
2646         if (cfil_info->cfi_hash_entry != NULL) {
2647                 cfil_sock_udp_attached_count--;
2648         }
2649         cfil_sock_attached_count--;
2650         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2651
2652         // Turn off stats reporting for cfil_info.
2653         cfil_info_stats_toggle(cfil_info, NULL, 0);
2654
2655         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2656         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2657
2658         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2659                 struct cfil_entry *entry;
2660
2661                 entry = &cfil_info->cfi_entries[kcunit - 1];
2662                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2663                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2664                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2665                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2666         }
2667         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2668
2669         if (out_drained) {
2670                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2671         }
2672         if (in_drain) {
2673                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2674         }
2675
2676         zfree(cfil_info_zone, cfil_info);
2677 }
2678
2679 /*
2680  * Received a verdict from userspace for a socket.
2681  * Perform any delayed operation if needed.
2682  */
2683 static void
2684 cfil_sock_received_verdict(struct socket *so)
2685 {
2686         if (so == NULL || so->so_cfil == NULL) {
2687                 return;
2688         }
2689
2690         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2691
2692         /*
2693          * If socket has already been connected, trigger
2694          * soisconnected now.
2695          */
2696         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2697                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2698                 soisconnected(so);
2699                 return;
2700         }
2701 }
2702
2703 /*
2704  * Entry point from Sockets layer
2705  * The socket is locked.
2706  *
2707  * Checks if a connected socket is subject to filter and
2708  * pending the initial verdict.
2709  */
2710 boolean_t
2711 cfil_sock_connected_pending_verdict(struct socket *so)
2712 {
2713         if (so == NULL || so->so_cfil == NULL) {
2714                 return false;
2715         }
2716
2717         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2718                 return false;
2719         } else {
2720                 /*
2721                  * Remember that this protocol is already connected, so
2722                  * we will trigger soisconnected() upon receipt of
2723                  * initial verdict later.
2724                  */
2725                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2726                 return true;
2727         }
2728 }
2729
2730 boolean_t
2731 cfil_filter_present(void)
2732 {
2733         return cfil_active_count > 0;
2734 }
2735
2736 /*
2737  * Entry point from Sockets layer
2738  * The socket is locked.
2739  */
2740 errno_t
2741 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2742 {
2743         errno_t error = 0;
2744         uint32_t filter_control_unit;
2745
2746         socket_lock_assert_owned(so);
2747
2748         /* Limit ourselves to TCP that are not MPTCP subflows */
2749         if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2750             so->so_proto->pr_domain->dom_family != PF_INET6) ||
2751             so->so_proto->pr_type != SOCK_STREAM ||
2752             so->so_proto->pr_protocol != IPPROTO_TCP ||
2753             (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2754             (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2755                 goto done;
2756         }
2757
2758         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2759         if (filter_control_unit == 0) {
2760                 goto done;
2761         }
2762
2763         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2764                 goto done;
2765         }
2766         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2767                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2768                 goto done;
2769         }
2770         if (cfil_active_count == 0) {
2771                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2772                 goto done;
2773         }
2774         if (so->so_cfil != NULL) {
2775                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2776                 CFIL_LOG(LOG_ERR, "already attached");
2777         } else {
2778                 cfil_info_alloc(so, NULL);
2779                 if (so->so_cfil == NULL) {
2780                         error = ENOMEM;
2781                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2782                         goto done;
2783                 }
2784                 so->so_cfil->cfi_dir = dir;
2785         }
2786         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2787                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2788                     filter_control_unit);
2789                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2790                 goto done;
2791         }
2792         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2793             (uint64_t)VM_KERNEL_ADDRPERM(so),
2794             filter_control_unit, so->so_cfil->cfi_sock_id);
2795
2796         so->so_flags |= SOF_CONTENT_FILTER;
2797         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2798
2799         /* Hold a reference on the socket */
2800         so->so_usecount++;
2801
2802         /*
2803          * Save passed addresses for attach event msg (in case resend
2804          * is needed.
2805          */
2806         if (remote != NULL) {
2807                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2808         }
2809         if (local != NULL) {
2810                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2811         }
2812
2813         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2814         /* We can recover from flow control or out of memory errors */
2815         if (error == ENOBUFS || error == ENOMEM) {
2816                 error = 0;
2817         } else if (error != 0) {
2818                 goto done;
2819         }
2820
2821         CFIL_INFO_VERIFY(so->so_cfil);
2822 done:
2823         return error;
2824 }
2825
2826 /*
2827  * Entry point from Sockets layer
2828  * The socket is locked.
2829  */
2830 errno_t
2831 cfil_sock_detach(struct socket *so)
2832 {
2833         if (IS_UDP(so)) {
2834                 cfil_db_free(so);
2835                 return 0;
2836         }
2837
2838         if (so->so_cfil) {
2839                 if (so->so_flags & SOF_CONTENT_FILTER) {
2840                         so->so_flags &= ~SOF_CONTENT_FILTER;
2841                         VERIFY(so->so_usecount > 0);
2842                         so->so_usecount--;
2843                 }
2844                 cfil_info_free(so->so_cfil);
2845                 so->so_cfil = NULL;
2846                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2847         }
2848         return 0;
2849 }
2850
2851 /*
2852  * Fill in the address info of an event message from either
2853  * the socket or passed in address info.
2854  */
2855 static void
2856 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2857     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2858     boolean_t isIPv4, boolean_t outgoing)
2859 {
2860         if (isIPv4) {
2861                 struct in_addr laddr = {0}, faddr = {0};
2862                 u_int16_t lport = 0, fport = 0;
2863
2864                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2865
2866                 if (outgoing) {
2867                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2868                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2869                 } else {
2870                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2871                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2872                 }
2873         } else {
2874                 struct in6_addr *laddr = NULL, *faddr = NULL;
2875                 u_int16_t lport = 0, fport = 0;
2876
2877                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2878                 if (outgoing) {
2879                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2880                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2881                 } else {
2882                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2883                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2884                 }
2885         }
2886 }
2887
2888 static boolean_t
2889 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2890     struct cfil_info *cfil_info,
2891     struct cfil_msg_sock_attached *msg)
2892 {
2893         struct cfil_crypto_data data = {};
2894
2895         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2896                 return false;
2897         }
2898
2899         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2900         data.direction = msg->cfs_conn_dir;
2901
2902         data.pid = msg->cfs_pid;
2903         data.effective_pid = msg->cfs_e_pid;
2904         uuid_copy(data.uuid, msg->cfs_uuid);
2905         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2906         data.socketProtocol = msg->cfs_sock_protocol;
2907         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2908                 data.remote.sin6 = msg->cfs_dst.sin6;
2909                 data.local.sin6 = msg->cfs_src.sin6;
2910         } else {
2911                 data.remote.sin6 = msg->cfs_src.sin6;
2912                 data.local.sin6 = msg->cfs_dst.sin6;
2913         }
2914
2915         // At attach, if local address is already present, no need to re-sign subsequent data messages.
2916         if (!NULLADDRESS(data.local)) {
2917                 cfil_info->cfi_isSignatureLatest = true;
2918         }
2919
2920         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2921         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2922                 msg->cfs_signature_length = 0;
2923                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2924                     msg->cfs_msghdr.cfm_sock_id);
2925                 return false;
2926         }
2927
2928         return true;
2929 }
2930
2931 static boolean_t
2932 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2933     struct socket *so, struct cfil_info *cfil_info,
2934     struct cfil_msg_data_event *msg)
2935 {
2936         struct cfil_crypto_data data = {};
2937
2938         if (crypto_state == NULL || msg == NULL ||
2939             so == NULL || cfil_info == NULL) {
2940                 return false;
2941         }
2942
2943         data.sock_id = cfil_info->cfi_sock_id;
2944         data.direction = cfil_info->cfi_dir;
2945         data.pid = so->last_pid;
2946         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2947         if (so->so_flags & SOF_DELEGATED) {
2948                 data.effective_pid = so->e_pid;
2949                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2950         } else {
2951                 data.effective_pid = so->last_pid;
2952                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2953         }
2954         data.socketProtocol = so->so_proto->pr_protocol;
2955
2956         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2957                 data.remote.sin6 = msg->cfc_dst.sin6;
2958                 data.local.sin6 = msg->cfc_src.sin6;
2959         } else {
2960                 data.remote.sin6 = msg->cfc_src.sin6;
2961                 data.local.sin6 = msg->cfc_dst.sin6;
2962         }
2963
2964         // At first data, local address may show up for the first time, update address cache and
2965         // no need to re-sign subsequent data messages anymore.
2966         if (!NULLADDRESS(data.local)) {
2967                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
2968                 cfil_info->cfi_isSignatureLatest = true;
2969         }
2970
2971         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
2972         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
2973                 msg->cfd_signature_length = 0;
2974                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
2975                     msg->cfd_msghdr.cfm_sock_id);
2976                 return false;
2977         }
2978
2979         return true;
2980 }
2981
2982 static boolean_t
2983 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
2984     struct socket *so, struct cfil_info *cfil_info,
2985     struct cfil_msg_sock_closed *msg)
2986 {
2987         struct cfil_crypto_data data = {};
2988         struct cfil_hash_entry hash_entry = {};
2989         struct cfil_hash_entry *hash_entry_ptr = NULL;
2990         struct inpcb *inp = (struct inpcb *)so->so_pcb;
2991
2992         if (crypto_state == NULL || msg == NULL ||
2993             so == NULL || inp == NULL || cfil_info == NULL) {
2994                 return false;
2995         }
2996
2997         data.sock_id = cfil_info->cfi_sock_id;
2998         data.direction = cfil_info->cfi_dir;
2999
3000         data.pid = so->last_pid;
3001         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
3002         if (so->so_flags & SOF_DELEGATED) {
3003                 data.effective_pid = so->e_pid;
3004                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
3005         } else {
3006                 data.effective_pid = so->last_pid;
3007                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
3008         }
3009         data.socketProtocol = so->so_proto->pr_protocol;
3010
3011         /*
3012          * Fill in address info:
3013          * For UDP, use the cfil_info hash entry directly.
3014          * For TCP, compose an hash entry with the saved addresses.
3015          */
3016         if (cfil_info->cfi_hash_entry != NULL) {
3017                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3018         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3019             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3020                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3021                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3022                 hash_entry_ptr = &hash_entry;
3023         }
3024         if (hash_entry_ptr != NULL) {
3025                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
3026                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
3027                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
3028                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, inp->inp_vflag & INP_IPV4, outgoing);
3029         }
3030
3031         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
3032         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
3033
3034         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
3035         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
3036                 msg->cfc_signature_length = 0;
3037                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
3038                     msg->cfc_msghdr.cfm_sock_id);
3039                 return false;
3040         }
3041
3042         return true;
3043 }
3044
3045 static int
3046 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
3047     uint32_t kcunit, int conn_dir)
3048 {
3049         errno_t error = 0;
3050         struct cfil_entry *entry = NULL;
3051         struct cfil_msg_sock_attached msg_attached;
3052         struct content_filter *cfc = NULL;
3053         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3054         struct cfil_hash_entry *hash_entry_ptr = NULL;
3055         struct cfil_hash_entry hash_entry;
3056
3057         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
3058         proc_t p = PROC_NULL;
3059         task_t t = TASK_NULL;
3060
3061         socket_lock_assert_owned(so);
3062
3063         cfil_rw_lock_shared(&cfil_lck_rw);
3064
3065         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
3066                 error = EINVAL;
3067                 goto done;
3068         }
3069
3070         if (kcunit == 0) {
3071                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
3072         } else {
3073                 entry = &cfil_info->cfi_entries[kcunit - 1];
3074         }
3075
3076         if (entry == NULL) {
3077                 goto done;
3078         }
3079
3080         cfc = entry->cfe_filter;
3081         if (cfc == NULL) {
3082                 goto done;
3083         }
3084
3085         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
3086                 goto done;
3087         }
3088
3089         if (kcunit == 0) {
3090                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
3091         }
3092
3093         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
3094             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
3095
3096         /* Would be wasteful to try when flow controlled */
3097         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3098                 error = ENOBUFS;
3099                 goto done;
3100         }
3101
3102         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
3103         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
3104         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
3105         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
3106         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
3107         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3108
3109         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
3110         msg_attached.cfs_sock_type = so->so_proto->pr_type;
3111         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
3112         msg_attached.cfs_pid = so->last_pid;
3113         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
3114         if (so->so_flags & SOF_DELEGATED) {
3115                 msg_attached.cfs_e_pid = so->e_pid;
3116                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
3117         } else {
3118                 msg_attached.cfs_e_pid = so->last_pid;
3119                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
3120         }
3121
3122         /*
3123          * Fill in address info:
3124          * For UDP, use the cfil_info hash entry directly.
3125          * For TCP, compose an hash entry with the saved addresses.
3126          */
3127         if (cfil_info->cfi_hash_entry != NULL) {
3128                 hash_entry_ptr = cfil_info->cfi_hash_entry;
3129         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
3130             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
3131                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
3132                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
3133                 hash_entry_ptr = &hash_entry;
3134         }
3135         if (hash_entry_ptr != NULL) {
3136                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
3137                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
3138                     inp->inp_vflag & INP_IPV4, conn_dir == CFS_CONNECTION_DIR_OUT);
3139         }
3140         msg_attached.cfs_conn_dir = conn_dir;
3141
3142         if (msg_attached.cfs_e_pid != 0) {
3143                 p = proc_find(msg_attached.cfs_e_pid);
3144                 if (p != PROC_NULL) {
3145                         t = proc_task(p);
3146                         if (t != TASK_NULL) {
3147                                 audit_token_t audit_token;
3148                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
3149                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
3150                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
3151                                 } else {
3152                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
3153                                             entry->cfe_cfil_info->cfi_sock_id);
3154                                 }
3155                         }
3156                         proc_rele(p);
3157                 }
3158         }
3159
3160         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
3161
3162 #if LIFECYCLE_DEBUG
3163         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
3164             entry->cfe_cfil_info->cfi_sock_id);
3165 #endif
3166
3167         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3168             entry->cfe_filter->cf_kcunit,
3169             &msg_attached,
3170             sizeof(struct cfil_msg_sock_attached),
3171             CTL_DATA_EOR);
3172         if (error != 0) {
3173                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3174                 goto done;
3175         }
3176         microuptime(&entry->cfe_last_event);
3177         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3178         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3179
3180         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3181         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3182 done:
3183
3184         /* We can recover from flow control */
3185         if (error == ENOBUFS) {
3186                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3187                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3188
3189                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3190                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3191                 }
3192
3193                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3194
3195                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3196         } else {
3197                 if (error != 0) {
3198                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3199                 }
3200
3201                 cfil_rw_unlock_shared(&cfil_lck_rw);
3202         }
3203         return error;
3204 }
3205
3206 static int
3207 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3208 {
3209         errno_t error = 0;
3210         struct mbuf *msg = NULL;
3211         struct cfil_entry *entry;
3212         struct cfe_buf *entrybuf;
3213         struct cfil_msg_hdr msg_disconnected;
3214         struct content_filter *cfc;
3215
3216         socket_lock_assert_owned(so);
3217
3218         cfil_rw_lock_shared(&cfil_lck_rw);
3219
3220         entry = &cfil_info->cfi_entries[kcunit - 1];
3221         if (outgoing) {
3222                 entrybuf = &entry->cfe_snd;
3223         } else {
3224                 entrybuf = &entry->cfe_rcv;
3225         }
3226
3227         cfc = entry->cfe_filter;
3228         if (cfc == NULL) {
3229                 goto done;
3230         }
3231
3232         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3233             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3234
3235         /*
3236          * Send the disconnection event once
3237          */
3238         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3239             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3240                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3241                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3242                 goto done;
3243         }
3244
3245         /*
3246          * We're not disconnected as long as some data is waiting
3247          * to be delivered to the filter
3248          */
3249         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3250                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3251                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3252                 error = EBUSY;
3253                 goto done;
3254         }
3255         /* Would be wasteful to try when flow controlled */
3256         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3257                 error = ENOBUFS;
3258                 goto done;
3259         }
3260
3261 #if LIFECYCLE_DEBUG
3262         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3263             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3264             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3265 #endif
3266
3267         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3268         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3269         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3270         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3271         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3272             CFM_OP_DISCONNECT_IN;
3273         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3274         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3275             entry->cfe_filter->cf_kcunit,
3276             &msg_disconnected,
3277             sizeof(struct cfil_msg_hdr),
3278             CTL_DATA_EOR);
3279         if (error != 0) {
3280                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3281                 mbuf_freem(msg);
3282                 goto done;
3283         }
3284         microuptime(&entry->cfe_last_event);
3285         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3286
3287         /* Remember we have sent the disconnection message */
3288         if (outgoing) {
3289                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3290                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3291         } else {
3292                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3293                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3294         }
3295 done:
3296         if (error == ENOBUFS) {
3297                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3298                 OSIncrementAtomic(
3299                         &cfil_stats.cfs_disconnect_event_flow_control);
3300
3301                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3302                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3303                 }
3304
3305                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3306
3307                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3308         } else {
3309                 if (error != 0) {
3310                         OSIncrementAtomic(
3311                                 &cfil_stats.cfs_disconnect_event_fail);
3312                 }
3313
3314                 cfil_rw_unlock_shared(&cfil_lck_rw);
3315         }
3316         return error;
3317 }
3318
3319 int
3320 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3321 {
3322         struct cfil_entry *entry;
3323         struct cfil_msg_sock_closed msg_closed;
3324         errno_t error = 0;
3325         struct content_filter *cfc;
3326
3327         socket_lock_assert_owned(so);
3328
3329         cfil_rw_lock_shared(&cfil_lck_rw);
3330
3331         entry = &cfil_info->cfi_entries[kcunit - 1];
3332         cfc = entry->cfe_filter;
3333         if (cfc == NULL) {
3334                 goto done;
3335         }
3336
3337         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3338             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3339
3340         /* Would be wasteful to try when flow controlled */
3341         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3342                 error = ENOBUFS;
3343                 goto done;
3344         }
3345         /*
3346          * Send a single closed message per filter
3347          */
3348         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3349                 goto done;
3350         }
3351         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3352                 goto done;
3353         }
3354
3355         microuptime(&entry->cfe_last_event);
3356         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3357
3358         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3359         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3360         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3361         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3362         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3363         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3364         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3365         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3366         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3367         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3368         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3369         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3370         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3371
3372         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3373
3374 #if LIFECYCLE_DEBUG
3375         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3376 #endif
3377         /* for debugging
3378          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3379          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3380          *  }
3381          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3382          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3383          *  }
3384          */
3385
3386         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3387             entry->cfe_filter->cf_kcunit,
3388             &msg_closed,
3389             sizeof(struct cfil_msg_sock_closed),
3390             CTL_DATA_EOR);
3391         if (error != 0) {
3392                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3393                     error);
3394                 goto done;
3395         }
3396
3397         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3398         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3399 done:
3400         /* We can recover from flow control */
3401         if (error == ENOBUFS) {
3402                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3403                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3404
3405                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3406                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3407                 }
3408
3409                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3410
3411                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3412         } else {
3413                 if (error != 0) {
3414                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3415                 }
3416
3417                 cfil_rw_unlock_shared(&cfil_lck_rw);
3418         }
3419
3420         return error;
3421 }
3422
3423 static void
3424 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3425     struct in6_addr *ip6, u_int16_t port)
3426 {
3427         if (sin46 == NULL) {
3428                 return;
3429         }
3430
3431         struct sockaddr_in6 *sin6 = &sin46->sin6;
3432
3433         sin6->sin6_family = AF_INET6;
3434         sin6->sin6_len = sizeof(*sin6);
3435         sin6->sin6_port = port;
3436         sin6->sin6_addr = *ip6;
3437         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3438                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3439                 sin6->sin6_addr.s6_addr16[1] = 0;
3440         }
3441 }
3442
3443 static void
3444 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3445     struct in_addr ip, u_int16_t port)
3446 {
3447         if (sin46 == NULL) {
3448                 return;
3449         }
3450
3451         struct sockaddr_in *sin = &sin46->sin;
3452
3453         sin->sin_family = AF_INET;
3454         sin->sin_len = sizeof(*sin);
3455         sin->sin_port = port;
3456         sin->sin_addr.s_addr = ip.s_addr;
3457 }
3458
3459 static void
3460 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3461     struct in6_addr **laddr, struct in6_addr **faddr,
3462     u_int16_t *lport, u_int16_t *fport)
3463 {
3464         if (entry != NULL) {
3465                 *laddr = &entry->cfentry_laddr.addr6;
3466                 *faddr = &entry->cfentry_faddr.addr6;
3467                 *lport = entry->cfentry_lport;
3468                 *fport = entry->cfentry_fport;
3469         } else {
3470                 *laddr = &inp->in6p_laddr;
3471                 *faddr = &inp->in6p_faddr;
3472                 *lport = inp->inp_lport;
3473                 *fport = inp->inp_fport;
3474         }
3475 }
3476
3477 static void
3478 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3479     struct in_addr *laddr, struct in_addr *faddr,
3480     u_int16_t *lport, u_int16_t *fport)
3481 {
3482         if (entry != NULL) {
3483                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3484                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3485                 *lport = entry->cfentry_lport;
3486                 *fport = entry->cfentry_fport;
3487         } else {
3488                 *laddr = inp->inp_laddr;
3489                 *faddr = inp->inp_faddr;
3490                 *lport = inp->inp_lport;
3491                 *fport = inp->inp_fport;
3492         }
3493 }
3494
3495 static int
3496 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3497     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3498 {
3499         errno_t error = 0;
3500         struct mbuf *copy = NULL;
3501         struct mbuf *msg = NULL;
3502         unsigned int one = 1;
3503         struct cfil_msg_data_event *data_req;
3504         size_t hdrsize;
3505         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3506         struct cfil_entry *entry;
3507         struct cfe_buf *entrybuf;
3508         struct content_filter *cfc;
3509         struct timeval tv;
3510
3511         cfil_rw_lock_shared(&cfil_lck_rw);
3512
3513         entry = &cfil_info->cfi_entries[kcunit - 1];
3514         if (outgoing) {
3515                 entrybuf = &entry->cfe_snd;
3516         } else {
3517                 entrybuf = &entry->cfe_rcv;
3518         }
3519
3520         cfc = entry->cfe_filter;
3521         if (cfc == NULL) {
3522                 goto done;
3523         }
3524
3525         data = cfil_data_start(data);
3526         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3527                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3528                 goto done;
3529         }
3530
3531         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3532             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3533
3534         socket_lock_assert_owned(so);
3535
3536         /* Would be wasteful to try */
3537         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3538                 error = ENOBUFS;
3539                 goto done;
3540         }
3541
3542         /* Make a copy of the data to pass to kernel control socket */
3543         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3544             M_COPYM_NOOP_HDR);
3545         if (copy == NULL) {
3546                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3547                 error = ENOMEM;
3548                 goto done;
3549         }
3550
3551         /* We need an mbuf packet for the message header */
3552         hdrsize = sizeof(struct cfil_msg_data_event);
3553         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3554         if (error != 0) {
3555                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3556                 m_freem(copy);
3557                 /*
3558                  * ENOBUFS is to indicate flow control
3559                  */
3560                 error = ENOMEM;
3561                 goto done;
3562         }
3563         mbuf_setlen(msg, hdrsize);
3564         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3565         msg->m_next = copy;
3566         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3567         bzero(data_req, hdrsize);
3568         data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3569         data_req->cfd_msghdr.cfm_version = 1;
3570         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3571         data_req->cfd_msghdr.cfm_op =
3572             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3573         data_req->cfd_msghdr.cfm_sock_id =
3574             entry->cfe_cfil_info->cfi_sock_id;
3575         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3576         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3577
3578         /*
3579          * Copy address/port into event msg.
3580          * For non connected sockets need to copy addresses from passed
3581          * parameters
3582          */
3583         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3584             &data_req->cfc_src, &data_req->cfc_dst,
3585             inp->inp_vflag & INP_IPV4, outgoing);
3586
3587         if (cfil_info->cfi_isSignatureLatest == false) {
3588                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3589         }
3590
3591         microuptime(&tv);
3592         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3593
3594         /* Pass the message to the content filter */
3595         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3596             entry->cfe_filter->cf_kcunit,
3597             msg, CTL_DATA_EOR);
3598         if (error != 0) {
3599                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3600                 mbuf_freem(msg);
3601                 goto done;
3602         }
3603         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3604         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3605
3606 #if VERDICT_DEBUG
3607         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3608             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3609 #endif
3610
3611 done:
3612         if (error == ENOBUFS) {
3613                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3614                 OSIncrementAtomic(
3615                         &cfil_stats.cfs_data_event_flow_control);
3616
3617                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3618                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3619                 }
3620
3621                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3622
3623                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3624         } else {
3625                 if (error != 0) {
3626                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3627                 }
3628
3629                 cfil_rw_unlock_shared(&cfil_lck_rw);
3630         }
3631         return error;
3632 }
3633
3634 /*
3635  * Process the queue of data waiting to be delivered to content filter
3636  */
3637 static int
3638 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3639 {
3640         errno_t error = 0;
3641         struct mbuf *data, *tmp = NULL;
3642         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3643         struct cfil_entry *entry;
3644         struct cfe_buf *entrybuf;
3645         uint64_t currentoffset = 0;
3646
3647         if (cfil_info == NULL) {
3648                 return 0;
3649         }
3650
3651         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3652             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3653
3654         socket_lock_assert_owned(so);
3655
3656         entry = &cfil_info->cfi_entries[kcunit - 1];
3657         if (outgoing) {
3658                 entrybuf = &entry->cfe_snd;
3659         } else {
3660                 entrybuf = &entry->cfe_rcv;
3661         }
3662
3663         /* Send attached message if not yet done */
3664         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3665                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3666                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3667                 if (error != 0) {
3668                         /* We can recover from flow control */
3669                         if (error == ENOBUFS || error == ENOMEM) {
3670                                 error = 0;
3671                         }
3672                         goto done;
3673                 }
3674         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3675                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3676                 goto done;
3677         }
3678
3679 #if DATA_DEBUG
3680         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3681             entrybuf->cfe_pass_offset,
3682             entrybuf->cfe_peeked,
3683             entrybuf->cfe_peek_offset);
3684 #endif
3685
3686         /* Move all data that can pass */
3687         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3688             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3689                 datalen = cfil_data_length(data, NULL, NULL);
3690                 tmp = data;
3691
3692                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3693                     entrybuf->cfe_pass_offset) {
3694                         /*
3695                          * The first mbuf can fully pass
3696                          */
3697                         copylen = datalen;
3698                 } else {
3699                         /*
3700                          * The first mbuf can partially pass
3701                          */
3702                         copylen = entrybuf->cfe_pass_offset -
3703                             entrybuf->cfe_ctl_q.q_start;
3704                 }
3705                 VERIFY(copylen <= datalen);
3706
3707 #if DATA_DEBUG
3708                 CFIL_LOG(LOG_DEBUG,
3709                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3710                     "datalen %u copylen %u",
3711                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3712                     entrybuf->cfe_ctl_q.q_start,
3713                     entrybuf->cfe_peeked,
3714                     entrybuf->cfe_pass_offset,
3715                     entrybuf->cfe_peek_offset,
3716                     datalen, copylen);
3717 #endif
3718
3719                 /*
3720                  * Data that passes has been peeked at explicitly or
3721                  * implicitly
3722                  */
3723                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3724                     entrybuf->cfe_peeked) {
3725                         entrybuf->cfe_peeked =
3726                             entrybuf->cfe_ctl_q.q_start + copylen;
3727                 }
3728                 /*
3729                  * Stop on partial pass
3730                  */
3731                 if (copylen < datalen) {
3732                         break;
3733                 }
3734
3735                 /* All good, move full data from ctl queue to pending queue */
3736                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3737
3738                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3739                 if (outgoing) {
3740                         OSAddAtomic64(datalen,
3741                             &cfil_stats.cfs_pending_q_out_enqueued);
3742                 } else {
3743                         OSAddAtomic64(datalen,
3744                             &cfil_stats.cfs_pending_q_in_enqueued);
3745                 }
3746         }
3747         CFIL_INFO_VERIFY(cfil_info);
3748         if (tmp != NULL) {
3749                 CFIL_LOG(LOG_DEBUG,
3750                     "%llx first %llu peeked %llu pass %llu peek %llu"
3751                     "datalen %u copylen %u",
3752                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3753                     entrybuf->cfe_ctl_q.q_start,
3754                     entrybuf->cfe_peeked,
3755                     entrybuf->cfe_pass_offset,
3756                     entrybuf->cfe_peek_offset,
3757                     datalen, copylen);
3758         }
3759         tmp = NULL;
3760
3761         /* Now deal with remaining data the filter wants to peek at */
3762         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3763             currentoffset = entrybuf->cfe_ctl_q.q_start;
3764             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3765             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3766             currentoffset += datalen) {
3767                 datalen = cfil_data_length(data, NULL, NULL);
3768                 tmp = data;
3769
3770                 /* We've already peeked at this mbuf */
3771                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3772                         continue;
3773                 }
3774                 /*
3775                  * The data in the first mbuf may have been
3776                  * partially peeked at
3777                  */
3778                 copyoffset = entrybuf->cfe_peeked - currentoffset;
3779                 VERIFY(copyoffset < datalen);
3780                 copylen = datalen - copyoffset;
3781                 VERIFY(copylen <= datalen);
3782                 /*
3783                  * Do not copy more than needed
3784                  */
3785                 if (currentoffset + copyoffset + copylen >
3786                     entrybuf->cfe_peek_offset) {
3787                         copylen = entrybuf->cfe_peek_offset -
3788                             (currentoffset + copyoffset);
3789                 }
3790
3791 #if DATA_DEBUG
3792                 CFIL_LOG(LOG_DEBUG,
3793                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3794                     "datalen %u copylen %u copyoffset %u",
3795                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3796                     currentoffset,
3797                     entrybuf->cfe_peeked,
3798                     entrybuf->cfe_pass_offset,
3799                     entrybuf->cfe_peek_offset,
3800                     datalen, copylen, copyoffset);
3801 #endif
3802
3803                 /*
3804                  * Stop if there is nothing more to peek at
3805                  */
3806                 if (copylen == 0) {
3807                         break;
3808                 }
3809                 /*
3810                  * Let the filter get a peek at this span of data
3811                  */
3812                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3813                     outgoing, data, copyoffset, copylen);
3814                 if (error != 0) {
3815                         /* On error, leave data in ctl_q */
3816                         break;
3817                 }
3818                 entrybuf->cfe_peeked += copylen;
3819                 if (outgoing) {
3820                         OSAddAtomic64(copylen,
3821                             &cfil_stats.cfs_ctl_q_out_peeked);
3822                 } else {
3823                         OSAddAtomic64(copylen,
3824                             &cfil_stats.cfs_ctl_q_in_peeked);
3825                 }
3826
3827                 /* Stop when data could not be fully peeked at */
3828                 if (copylen + copyoffset < datalen) {
3829                         break;
3830                 }
3831         }
3832         CFIL_INFO_VERIFY(cfil_info);
3833         if (tmp != NULL) {
3834                 CFIL_LOG(LOG_DEBUG,
3835                     "%llx first %llu peeked %llu pass %llu peek %llu"
3836                     "datalen %u copylen %u copyoffset %u",
3837                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3838                     currentoffset,
3839                     entrybuf->cfe_peeked,
3840                     entrybuf->cfe_pass_offset,
3841                     entrybuf->cfe_peek_offset,
3842                     datalen, copylen, copyoffset);
3843         }
3844
3845         /*
3846          * Process data that has passed the filter
3847          */
3848         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3849         if (error != 0) {
3850                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3851                     error);
3852                 goto done;
3853         }
3854
3855         /*
3856          * Dispatch disconnect events that could not be sent
3857          */
3858         if (cfil_info == NULL) {
3859                 goto done;
3860         } else if (outgoing) {
3861                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3862                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3863                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3864                 }
3865         } else {
3866                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3867                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3868                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3869                 }
3870         }
3871
3872 done:
3873         CFIL_LOG(LOG_DEBUG,
3874             "first %llu peeked %llu pass %llu peek %llu",
3875             entrybuf->cfe_ctl_q.q_start,
3876             entrybuf->cfe_peeked,
3877             entrybuf->cfe_pass_offset,
3878             entrybuf->cfe_peek_offset);
3879
3880         CFIL_INFO_VERIFY(cfil_info);
3881         return error;
3882 }
3883
3884 /*
3885  * cfil_data_filter()
3886  *
3887  * Process data for a content filter installed on a socket
3888  */
3889 int
3890 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3891     struct mbuf *data, uint64_t datalen)
3892 {
3893         errno_t error = 0;
3894         struct cfil_entry *entry;
3895         struct cfe_buf *entrybuf;
3896
3897         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3898             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3899
3900         socket_lock_assert_owned(so);
3901
3902         entry = &cfil_info->cfi_entries[kcunit - 1];
3903         if (outgoing) {
3904                 entrybuf = &entry->cfe_snd;
3905         } else {
3906                 entrybuf = &entry->cfe_rcv;
3907         }
3908
3909         /* Are we attached to the filter? */
3910         if (entry->cfe_filter == NULL) {
3911                 error = 0;
3912                 goto done;
3913         }
3914
3915         /* Dispatch to filters */
3916         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3917         if (outgoing) {
3918                 OSAddAtomic64(datalen,
3919                     &cfil_stats.cfs_ctl_q_out_enqueued);
3920         } else {
3921                 OSAddAtomic64(datalen,
3922                     &cfil_stats.cfs_ctl_q_in_enqueued);
3923         }
3924
3925         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3926         if (error != 0) {
3927                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3928                     error);
3929         }
3930         /*
3931          * We have to return EJUSTRETURN in all cases to avoid double free
3932          * by socket layer
3933          */
3934         error = EJUSTRETURN;
3935 done:
3936         CFIL_INFO_VERIFY(cfil_info);
3937
3938         CFIL_LOG(LOG_INFO, "return %d", error);
3939         return error;
3940 }
3941
3942 /*
3943  * cfil_service_inject_queue() re-inject data that passed the
3944  * content filters
3945  */
3946 static int
3947 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3948 {
3949         mbuf_t data;
3950         unsigned int datalen;
3951         int mbcnt = 0;
3952         int mbnum = 0;
3953         errno_t error = 0;
3954         struct cfi_buf *cfi_buf;
3955         struct cfil_queue *inject_q;
3956         int need_rwakeup = 0;
3957         int count = 0;
3958
3959         if (cfil_info == NULL) {
3960                 return 0;
3961         }
3962
3963         socket_lock_assert_owned(so);
3964
3965         if (outgoing) {
3966                 cfi_buf = &cfil_info->cfi_snd;
3967                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3968         } else {
3969                 cfi_buf = &cfil_info->cfi_rcv;
3970                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3971         }
3972         inject_q = &cfi_buf->cfi_inject_q;
3973
3974         if (cfil_queue_empty(inject_q)) {
3975                 return 0;
3976         }
3977
3978 #if DATA_DEBUG | VERDICT_DEBUG
3979         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3980             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3981 #endif
3982
3983         while ((data = cfil_queue_first(inject_q)) != NULL) {
3984                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3985
3986 #if DATA_DEBUG
3987                 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3988                     remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3989                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3990 #endif
3991
3992                 /* Remove data from queue and adjust stats */
3993                 cfil_queue_remove(inject_q, data, datalen);
3994                 cfi_buf->cfi_pending_first += datalen;
3995                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3996                 cfi_buf->cfi_pending_mbnum -= mbnum;
3997                 cfil_info_buf_verify(cfi_buf);
3998
3999                 if (outgoing) {
4000                         error = sosend_reinject(so, NULL, data, NULL, 0);
4001                         if (error != 0) {
4002 #if DATA_DEBUG
4003                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
4004                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
4005 #endif
4006                                 break;
4007                         }
4008                         // At least one injection succeeded, need to wake up pending threads.
4009                         need_rwakeup = 1;
4010                 } else {
4011                         data->m_flags |= M_SKIPCFIL;
4012
4013                         /*
4014                          * NOTE: We currently only support TCP and UDP.
4015                          * For RAWIP, MPTCP and message TCP we'll
4016                          * need to call the appropriate sbappendxxx()
4017                          * of fix sock_inject_data_in()
4018                          */
4019                         if (IS_UDP(so) == TRUE) {
4020                                 if (sbappendchain(&so->so_rcv, data, 0)) {
4021                                         need_rwakeup = 1;
4022                                 }
4023                         } else {
4024                                 if (sbappendstream(&so->so_rcv, data)) {
4025                                         need_rwakeup = 1;
4026                                 }
4027                         }
4028                 }
4029
4030                 if (outgoing) {
4031                         OSAddAtomic64(datalen,
4032                             &cfil_stats.cfs_inject_q_out_passed);
4033                 } else {
4034                         OSAddAtomic64(datalen,
4035                             &cfil_stats.cfs_inject_q_in_passed);
4036                 }
4037
4038                 count++;
4039         }
4040
4041 #if DATA_DEBUG | VERDICT_DEBUG
4042         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
4043             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
4044 #endif
4045
4046         /* A single wakeup is for several packets is more efficient */
4047         if (need_rwakeup) {
4048                 if (outgoing == TRUE) {
4049                         sowwakeup(so);
4050                 } else {
4051                         sorwakeup(so);
4052                 }
4053         }
4054
4055         if (error != 0 && cfil_info) {
4056                 if (error == ENOBUFS) {
4057                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
4058                 }
4059                 if (error == ENOMEM) {
4060                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
4061                 }
4062
4063                 if (outgoing) {
4064                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
4065                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
4066                 } else {
4067                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
4068                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
4069                 }
4070         }
4071
4072         /*
4073          * Notify
4074          */
4075         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
4076                 cfil_sock_notify_shutdown(so, SHUT_WR);
4077                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
4078                         soshutdownlock_final(so, SHUT_WR);
4079                 }
4080         }
4081         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4082                 if (cfil_filters_attached(so) == 0) {
4083                         CFIL_LOG(LOG_INFO, "so %llx waking",
4084                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4085                         wakeup((caddr_t)cfil_info);
4086                 }
4087         }
4088
4089         CFIL_INFO_VERIFY(cfil_info);
4090
4091         return error;
4092 }
4093
4094 static int
4095 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
4096 {
4097         uint64_t passlen, curlen;
4098         mbuf_t data;
4099         unsigned int datalen;
4100         errno_t error = 0;
4101         struct cfil_entry *entry;
4102         struct cfe_buf *entrybuf;
4103         struct cfil_queue *pending_q;
4104
4105         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
4106             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
4107
4108         socket_lock_assert_owned(so);
4109
4110         entry = &cfil_info->cfi_entries[kcunit - 1];
4111         if (outgoing) {
4112                 entrybuf = &entry->cfe_snd;
4113         } else {
4114                 entrybuf = &entry->cfe_rcv;
4115         }
4116
4117         pending_q = &entrybuf->cfe_pending_q;
4118
4119         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
4120
4121         /*
4122          * Locate the chunks of data that we can pass to the next filter
4123          * A data chunk must be on mbuf boundaries
4124          */
4125         curlen = 0;
4126         while ((data = cfil_queue_first(pending_q)) != NULL) {
4127                 struct cfil_entry *iter_entry;
4128                 datalen = cfil_data_length(data, NULL, NULL);
4129
4130 #if DATA_DEBUG
4131                 CFIL_LOG(LOG_DEBUG,
4132                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
4133                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
4134                     passlen, curlen);
4135 #endif
4136
4137                 if (curlen + datalen > passlen) {
4138                         break;
4139                 }
4140
4141                 cfil_queue_remove(pending_q, data, datalen);
4142
4143                 curlen += datalen;
4144
4145                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
4146                     iter_entry != NULL;
4147                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
4148                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
4149                             data, datalen);
4150                         /* 0 means passed so we can continue */
4151                         if (error != 0) {
4152                                 break;
4153                         }
4154                 }
4155                 /* When data has passed all filters, re-inject */
4156                 if (error == 0) {
4157                         if (outgoing) {
4158                                 cfil_queue_enqueue(
4159                                         &cfil_info->cfi_snd.cfi_inject_q,
4160                                         data, datalen);
4161                                 OSAddAtomic64(datalen,
4162                                     &cfil_stats.cfs_inject_q_out_enqueued);
4163                         } else {
4164                                 cfil_queue_enqueue(
4165                                         &cfil_info->cfi_rcv.cfi_inject_q,
4166                                         data, datalen);
4167                                 OSAddAtomic64(datalen,
4168                                     &cfil_stats.cfs_inject_q_in_enqueued);
4169                         }
4170                 }
4171         }
4172
4173         CFIL_INFO_VERIFY(cfil_info);
4174
4175         return error;
4176 }
4177
4178 int
4179 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4180     uint64_t pass_offset, uint64_t peek_offset)
4181 {
4182         errno_t error = 0;
4183         struct cfil_entry *entry = NULL;
4184         struct cfe_buf *entrybuf;
4185         int updated = 0;
4186
4187         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4188
4189         socket_lock_assert_owned(so);
4190
4191         if (cfil_info == NULL) {
4192                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4193                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4194                 error = 0;
4195                 goto done;
4196         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4197                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4198                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4199                 error = EPIPE;
4200                 goto done;
4201         }
4202
4203         entry = &cfil_info->cfi_entries[kcunit - 1];
4204         if (outgoing) {
4205                 entrybuf = &entry->cfe_snd;
4206         } else {
4207                 entrybuf = &entry->cfe_rcv;
4208         }
4209
4210         /* Record updated offsets for this content filter */
4211         if (pass_offset > entrybuf->cfe_pass_offset) {
4212                 entrybuf->cfe_pass_offset = pass_offset;
4213
4214                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4215                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4216                 }
4217                 updated = 1;
4218         } else {
4219                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4220                     pass_offset, entrybuf->cfe_pass_offset);
4221         }
4222         /* Filter does not want or need to see data that's allowed to pass */
4223         if (peek_offset > entrybuf->cfe_pass_offset &&
4224             peek_offset > entrybuf->cfe_peek_offset) {
4225                 entrybuf->cfe_peek_offset = peek_offset;
4226                 updated = 1;
4227         }
4228         /* Nothing to do */
4229         if (updated == 0) {
4230                 goto done;
4231         }
4232
4233         /* Move data held in control queue to pending queue if needed */
4234         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4235         if (error != 0) {
4236                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4237                     error);
4238                 goto done;
4239         }
4240         error = EJUSTRETURN;
4241
4242 done:
4243         /*
4244          * The filter is effectively detached when pass all from both sides
4245          * or when the socket is closed and no more data is waiting
4246          * to be delivered to the filter
4247          */
4248         if (entry != NULL &&
4249             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4250             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4251             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4252             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4253             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4254                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4255 #if LIFECYCLE_DEBUG
4256                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4257                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4258                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4259 #endif
4260                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4261                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4262                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4263                     cfil_filters_attached(so) == 0) {
4264 #if LIFECYCLE_DEBUG
4265                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4266 #endif
4267                         CFIL_LOG(LOG_INFO, "so %llx waking",
4268                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4269                         wakeup((caddr_t)cfil_info);
4270                 }
4271         }
4272         CFIL_INFO_VERIFY(cfil_info);
4273         CFIL_LOG(LOG_INFO, "return %d", error);
4274         return error;
4275 }
4276
4277 /*
4278  * Update pass offset for socket when no data is pending
4279  */
4280 static int
4281 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4282 {
4283         struct cfi_buf *cfi_buf;
4284         struct cfil_entry *entry;
4285         struct cfe_buf *entrybuf;
4286         uint32_t kcunit;
4287         uint64_t pass_offset = 0;
4288
4289         if (cfil_info == NULL) {
4290                 return 0;
4291         }
4292
4293         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4294             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4295
4296         socket_lock_assert_owned(so);
4297
4298         if (outgoing) {
4299                 cfi_buf = &cfil_info->cfi_snd;
4300         } else {
4301                 cfi_buf = &cfil_info->cfi_rcv;
4302         }
4303
4304         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4305             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4306             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4307
4308         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4309                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4310                         entry = &cfil_info->cfi_entries[kcunit - 1];
4311
4312                         /* Are we attached to a filter? */
4313                         if (entry->cfe_filter == NULL) {
4314                                 continue;
4315                         }
4316
4317                         if (outgoing) {
4318                                 entrybuf = &entry->cfe_snd;
4319                         } else {
4320                                 entrybuf = &entry->cfe_rcv;
4321                         }
4322
4323                         if (pass_offset == 0 ||
4324                             entrybuf->cfe_pass_offset < pass_offset) {
4325                                 pass_offset = entrybuf->cfe_pass_offset;
4326                         }
4327                 }
4328                 cfi_buf->cfi_pass_offset = pass_offset;
4329         }
4330
4331         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4332             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4333
4334         return 0;
4335 }
4336
4337 int
4338 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4339     uint64_t pass_offset, uint64_t peek_offset)
4340 {
4341         errno_t error = 0;
4342
4343         CFIL_LOG(LOG_INFO, "");
4344
4345         socket_lock_assert_owned(so);
4346
4347         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4348         if (error != 0) {
4349                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4350                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4351                     outgoing ? "out" : "in");
4352                 goto release;
4353         }
4354
4355         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4356             pass_offset, peek_offset);
4357
4358         cfil_service_inject_queue(so, cfil_info, outgoing);
4359
4360         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4361 release:
4362         CFIL_INFO_VERIFY(cfil_info);
4363         cfil_release_sockbuf(so, outgoing);
4364
4365         return error;
4366 }
4367
4368
4369 static void
4370 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4371 {
4372         struct cfil_entry *entry;
4373         int kcunit;
4374         uint64_t drained;
4375
4376         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4377                 goto done;
4378         }
4379
4380         socket_lock_assert_owned(so);
4381
4382         /*
4383          * Flush the output queues and ignore errors as long as
4384          * we are attached
4385          */
4386         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4387         if (cfil_info != NULL) {
4388                 drained = 0;
4389                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4390                         entry = &cfil_info->cfi_entries[kcunit - 1];
4391
4392                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4393                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4394                 }
4395                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4396
4397                 if (drained) {
4398                         if (cfil_info->cfi_flags & CFIF_DROP) {
4399                                 OSIncrementAtomic(
4400                                         &cfil_stats.cfs_flush_out_drop);
4401                         } else {
4402                                 OSIncrementAtomic(
4403                                         &cfil_stats.cfs_flush_out_close);
4404                         }
4405                 }
4406         }
4407         cfil_release_sockbuf(so, 1);
4408
4409         /*
4410          * Flush the input queues
4411          */
4412         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4413         if (cfil_info != NULL) {
4414                 drained = 0;
4415                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4416                         entry = &cfil_info->cfi_entries[kcunit - 1];
4417
4418                         drained += cfil_queue_drain(
4419                                 &entry->cfe_rcv.cfe_ctl_q);
4420                         drained += cfil_queue_drain(
4421                                 &entry->cfe_rcv.cfe_pending_q);
4422                 }
4423                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4424
4425                 if (drained) {
4426                         if (cfil_info->cfi_flags & CFIF_DROP) {
4427                                 OSIncrementAtomic(
4428                                         &cfil_stats.cfs_flush_in_drop);
4429                         } else {
4430                                 OSIncrementAtomic(
4431                                         &cfil_stats.cfs_flush_in_close);
4432                         }
4433                 }
4434         }
4435         cfil_release_sockbuf(so, 0);
4436 done:
4437         CFIL_INFO_VERIFY(cfil_info);
4438 }
4439
4440 int
4441 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4442 {
4443         errno_t error = 0;
4444         struct cfil_entry *entry;
4445         struct proc *p;
4446
4447         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4448                 goto done;
4449         }
4450
4451         socket_lock_assert_owned(so);
4452
4453         entry = &cfil_info->cfi_entries[kcunit - 1];
4454
4455         /* Are we attached to the filter? */
4456         if (entry->cfe_filter == NULL) {
4457                 goto done;
4458         }
4459
4460         cfil_info->cfi_flags |= CFIF_DROP;
4461
4462         p = current_proc();
4463
4464         /*
4465          * Force the socket to be marked defunct
4466          * (forcing fixed along with rdar://19391339)
4467          */
4468         if (so->so_cfil_db == NULL) {
4469                 error = sosetdefunct(p, so,
4470                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4471                     FALSE);
4472
4473                 /* Flush the socket buffer and disconnect */
4474                 if (error == 0) {
4475                         error = sodefunct(p, so,
4476                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4477                 }
4478         }
4479
4480         /* The filter is done, mark as detached */
4481         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4482 #if LIFECYCLE_DEBUG
4483         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4484 #endif
4485         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4486             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4487
4488         /* Pending data needs to go */
4489         cfil_flush_queues(so, cfil_info);
4490
4491         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4492                 if (cfil_filters_attached(so) == 0) {
4493                         CFIL_LOG(LOG_INFO, "so %llx waking",
4494                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4495                         wakeup((caddr_t)cfil_info);
4496                 }
4497         }
4498 done:
4499         return error;
4500 }
4501
4502 int
4503 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4504 {
4505         errno_t error = 0;
4506         struct cfil_info *cfil_info = NULL;
4507
4508         bool cfil_attached = false;
4509         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4510
4511         // Search and lock socket
4512         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4513         if (so == NULL) {
4514                 error = ENOENT;
4515         } else {
4516                 // The client gets a pass automatically
4517                 cfil_info = (so->so_cfil_db != NULL) ?
4518                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4519
4520                 if (cfil_attached) {
4521 #if VERDICT_DEBUG
4522                         if (cfil_info != NULL) {
4523                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4524                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4525                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4526                                     cfil_info->cfi_sock_id);
4527                         }
4528 #endif
4529                         cfil_sock_received_verdict(so);
4530                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4531                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4532                 } else {
4533                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4534                 }
4535                 socket_unlock(so, 1);
4536         }
4537
4538         return error;
4539 }
4540
4541 int
4542 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4543 {
4544         struct content_filter *cfc = NULL;
4545         cfil_crypto_state_t crypto_state = NULL;
4546         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4547
4548         CFIL_LOG(LOG_NOTICE, "");
4549
4550         if (content_filters == NULL) {
4551                 CFIL_LOG(LOG_ERR, "no content filter");
4552                 return EINVAL;
4553         }
4554         if (kcunit > MAX_CONTENT_FILTER) {
4555                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4556                     kcunit, MAX_CONTENT_FILTER);
4557                 return EINVAL;
4558         }
4559         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4560         if (crypto_state == NULL) {
4561                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4562                     kcunit);
4563                 return EINVAL;
4564         }
4565
4566         cfil_rw_lock_exclusive(&cfil_lck_rw);
4567
4568         cfc = content_filters[kcunit - 1];
4569         if (cfc->cf_kcunit != kcunit) {
4570                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4571                     kcunit);
4572                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4573                 cfil_crypto_cleanup_state(crypto_state);
4574                 return EINVAL;
4575         }
4576         if (cfc->cf_crypto_state != NULL) {
4577                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4578                 cfc->cf_crypto_state = NULL;
4579         }
4580         cfc->cf_crypto_state = crypto_state;
4581
4582         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4583         return 0;
4584 }
4585
4586 static int
4587 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4588 {
4589         struct cfil_entry *entry;
4590         struct cfe_buf *entrybuf;
4591         uint32_t kcunit;
4592
4593         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4594             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4595
4596         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4597                 entry = &cfil_info->cfi_entries[kcunit - 1];
4598
4599                 /* Are we attached to the filter? */
4600                 if (entry->cfe_filter == NULL) {
4601                         continue;
4602                 }
4603
4604                 if (outgoing) {
4605                         entrybuf = &entry->cfe_snd;
4606                 } else {
4607                         entrybuf = &entry->cfe_rcv;
4608                 }
4609
4610                 entrybuf->cfe_ctl_q.q_start += datalen;
4611                 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4612                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4613                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4614                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4615                 }
4616
4617                 entrybuf->cfe_ctl_q.q_end += datalen;
4618
4619                 entrybuf->cfe_pending_q.q_start += datalen;
4620                 entrybuf->cfe_pending_q.q_end += datalen;
4621         }
4622         CFIL_INFO_VERIFY(cfil_info);
4623         return 0;
4624 }
4625
4626 int
4627 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4628     struct mbuf *data, struct mbuf *control, uint32_t flags)
4629 {
4630 #pragma unused(to, control, flags)
4631         errno_t error = 0;
4632         unsigned int datalen;
4633         int mbcnt = 0;
4634         int mbnum = 0;
4635         int kcunit;
4636         struct cfi_buf *cfi_buf;
4637         struct mbuf *chain = NULL;
4638
4639         if (cfil_info == NULL) {
4640                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4641                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4642                 error = 0;
4643                 goto done;
4644         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4645                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4646                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4647                 error = EPIPE;
4648                 goto done;
4649         }
4650
4651         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4652
4653         if (outgoing) {
4654                 cfi_buf = &cfil_info->cfi_snd;
4655                 cfil_info->cfi_byte_outbound_count += datalen;
4656         } else {
4657                 cfi_buf = &cfil_info->cfi_rcv;
4658                 cfil_info->cfi_byte_inbound_count += datalen;
4659         }
4660
4661         cfi_buf->cfi_pending_last += datalen;
4662         cfi_buf->cfi_pending_mbcnt += mbcnt;
4663         cfi_buf->cfi_pending_mbnum += mbnum;
4664
4665         if (IS_UDP(so)) {
4666                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4667                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4668                         cfi_buf->cfi_tail_drop_cnt++;
4669                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4670                         cfi_buf->cfi_pending_mbnum -= mbnum;
4671                         return EPIPE;
4672                 }
4673         }
4674
4675         cfil_info_buf_verify(cfi_buf);
4676
4677 #if DATA_DEBUG
4678         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4679             (uint64_t)VM_KERNEL_ADDRPERM(so),
4680             outgoing ? "OUT" : "IN",
4681             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4682             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4683             cfi_buf->cfi_pending_last,
4684             cfi_buf->cfi_pending_mbcnt,
4685             cfi_buf->cfi_pass_offset);
4686 #endif
4687
4688         /* Fast path when below pass offset */
4689         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4690                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4691 #if DATA_DEBUG
4692                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4693 #endif
4694         } else {
4695                 struct cfil_entry *iter_entry;
4696                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4697                         // Is cfil attached to this filter?
4698                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4699                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4700                                 if (IS_UDP(so) && chain == NULL) {
4701                                         /* UDP only:
4702                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4703                                          * This full chain will be reinjected into socket after recieving verdict.
4704                                          */
4705                                         (void) cfil_udp_save_socket_state(cfil_info, data);
4706                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4707                                         if (chain == NULL) {
4708                                                 return ENOBUFS;
4709                                         }
4710                                         data = chain;
4711                                 }
4712                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4713                                     datalen);
4714                         }
4715                         /* 0 means passed so continue with next filter */
4716                         if (error != 0) {
4717                                 break;
4718                         }
4719                 }
4720         }
4721
4722         /* Move cursor if no filter claimed the data */
4723         if (error == 0) {
4724                 cfi_buf->cfi_pending_first += datalen;
4725                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4726                 cfi_buf->cfi_pending_mbnum -= mbnum;
4727                 cfil_info_buf_verify(cfi_buf);
4728         }
4729 done:
4730         CFIL_INFO_VERIFY(cfil_info);
4731
4732         return error;
4733 }
4734
4735 /*
4736  * Callback from socket layer sosendxxx()
4737  */
4738 int
4739 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4740     struct mbuf *data, struct mbuf *control, uint32_t flags)
4741 {
4742         int error = 0;
4743
4744         if (IS_UDP(so)) {
4745                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4746         }
4747
4748         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4749                 return 0;
4750         }
4751
4752         /*
4753          * Pass initial data for TFO.
4754          */
4755         if (IS_INITIAL_TFO_DATA(so)) {
4756                 return 0;
4757         }
4758
4759         socket_lock_assert_owned(so);
4760
4761         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4762                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4763                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4764                 return EPIPE;
4765         }
4766         if (control != NULL) {
4767                 CFIL_LOG(LOG_ERR, "so %llx control",
4768                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4769                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4770         }
4771         if ((flags & MSG_OOB)) {
4772                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4773                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4774                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4775         }
4776         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4777                 panic("so %p SB_LOCK not set", so);
4778         }
4779
4780         if (so->so_snd.sb_cfil_thread != NULL) {
4781                 panic("%s sb_cfil_thread %p not NULL", __func__,
4782                     so->so_snd.sb_cfil_thread);
4783         }
4784
4785         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4786
4787         return error;
4788 }
4789
4790 /*
4791  * Callback from socket layer sbappendxxx()
4792  */
4793 int
4794 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4795     struct mbuf *data, struct mbuf *control, uint32_t flags)
4796 {
4797         int error = 0;
4798
4799         if (IS_UDP(so)) {
4800                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4801         }
4802
4803         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4804                 return 0;
4805         }
4806
4807         /*
4808          * Pass initial data for TFO.
4809          */
4810         if (IS_INITIAL_TFO_DATA(so)) {
4811                 return 0;
4812         }
4813
4814         socket_lock_assert_owned(so);
4815
4816         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4817                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4818                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4819                 return EPIPE;
4820         }
4821         if (control != NULL) {
4822                 CFIL_LOG(LOG_ERR, "so %llx control",
4823                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4824                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4825         }
4826         if (data->m_type == MT_OOBDATA) {
4827                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4828                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4829                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4830         }
4831         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4832
4833         return error;
4834 }
4835
4836 /*
4837  * Callback from socket layer soshutdownxxx()
4838  *
4839  * We may delay the shutdown write if there's outgoing data in process.
4840  *
4841  * There is no point in delaying the shutdown read because the process
4842  * indicated that it does not want to read anymore data.
4843  */
4844 int
4845 cfil_sock_shutdown(struct socket *so, int *how)
4846 {
4847         int error = 0;
4848
4849         if (IS_UDP(so)) {
4850                 return cfil_sock_udp_shutdown(so, how);
4851         }
4852
4853         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4854                 goto done;
4855         }
4856
4857         socket_lock_assert_owned(so);
4858
4859         CFIL_LOG(LOG_INFO, "so %llx how %d",
4860             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4861
4862         /*
4863          * Check the state of the socket before the content filter
4864          */
4865         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4866                 /* read already shut down */
4867                 error = ENOTCONN;
4868                 goto done;
4869         }
4870         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4871                 /* write already shut down */
4872                 error = ENOTCONN;
4873                 goto done;
4874         }
4875
4876         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4877                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4878                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4879                 goto done;
4880         }
4881
4882         /*
4883          * shutdown read: SHUT_RD or SHUT_RDWR
4884          */
4885         if (*how != SHUT_WR) {
4886                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4887                         error = ENOTCONN;
4888                         goto done;
4889                 }
4890                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4891                 cfil_sock_notify_shutdown(so, SHUT_RD);
4892         }
4893         /*
4894          * shutdown write: SHUT_WR or SHUT_RDWR
4895          */
4896         if (*how != SHUT_RD) {
4897                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4898                         error = ENOTCONN;
4899                         goto done;
4900                 }
4901                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4902                 cfil_sock_notify_shutdown(so, SHUT_WR);
4903                 /*
4904                  * When outgoing data is pending, we delay the shutdown at the
4905                  * protocol level until the content filters give the final
4906                  * verdict on the pending data.
4907                  */
4908                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4909                         /*
4910                          * When shutting down the read and write sides at once
4911                          * we can proceed to the final shutdown of the read
4912                          * side. Otherwise, we just return.
4913                          */
4914                         if (*how == SHUT_WR) {
4915                                 error = EJUSTRETURN;
4916                         } else if (*how == SHUT_RDWR) {
4917                                 *how = SHUT_RD;
4918                         }
4919                 }
4920         }
4921 done:
4922         return error;
4923 }
4924
4925 /*
4926  * This is called when the socket is closed and there is no more
4927  * opportunity for filtering
4928  */
4929 void
4930 cfil_sock_is_closed(struct socket *so)
4931 {
4932         errno_t error = 0;
4933         int kcunit;
4934
4935         if (IS_UDP(so)) {
4936                 cfil_sock_udp_is_closed(so);
4937                 return;
4938         }
4939
4940         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4941                 return;
4942         }
4943
4944         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4945
4946         socket_lock_assert_owned(so);
4947
4948         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4949                 /* Let the filters know of the closing */
4950                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4951         }
4952
4953         /* Last chance to push passed data out */
4954         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4955         if (error == 0) {
4956                 cfil_service_inject_queue(so, so->so_cfil, 1);
4957         }
4958         cfil_release_sockbuf(so, 1);
4959
4960         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4961
4962         /* Pending data needs to go */
4963         cfil_flush_queues(so, so->so_cfil);
4964
4965         CFIL_INFO_VERIFY(so->so_cfil);
4966 }
4967
4968 /*
4969  * This is called when the socket is disconnected so let the filters
4970  * know about the disconnection and that no more data will come
4971  *
4972  * The how parameter has the same values as soshutown()
4973  */
4974 void
4975 cfil_sock_notify_shutdown(struct socket *so, int how)
4976 {
4977         errno_t error = 0;
4978         int kcunit;
4979
4980         if (IS_UDP(so)) {
4981                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4982                 return;
4983         }
4984
4985         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4986                 return;
4987         }
4988
4989         CFIL_LOG(LOG_INFO, "so %llx how %d",
4990             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4991
4992         socket_lock_assert_owned(so);
4993
4994         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4995                 /* Disconnect incoming side */
4996                 if (how != SHUT_WR) {
4997                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4998                 }
4999                 /* Disconnect outgoing side */
5000                 if (how != SHUT_RD) {
5001                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
5002                 }
5003         }
5004 }
5005
5006 static int
5007 cfil_filters_attached(struct socket *so)
5008 {
5009         struct cfil_entry *entry;
5010         uint32_t kcunit;
5011         int attached = 0;
5012
5013         if (IS_UDP(so)) {
5014                 return cfil_filters_udp_attached(so, FALSE);
5015         }
5016
5017         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5018                 return 0;
5019         }
5020
5021         socket_lock_assert_owned(so);
5022
5023         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5024                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
5025
5026                 /* Are we attached to the filter? */
5027                 if (entry->cfe_filter == NULL) {
5028                         continue;
5029                 }
5030                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5031                         continue;
5032                 }
5033                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5034                         continue;
5035                 }
5036                 attached = 1;
5037                 break;
5038         }
5039
5040         return attached;
5041 }
5042
5043 /*
5044  * This is called when the socket is closed and we are waiting for
5045  * the filters to gives the final pass or drop
5046  */
5047 void
5048 cfil_sock_close_wait(struct socket *so)
5049 {
5050         lck_mtx_t *mutex_held;
5051         struct timespec ts;
5052         int error;
5053
5054         if (IS_UDP(so)) {
5055                 cfil_sock_udp_close_wait(so);
5056                 return;
5057         }
5058
5059         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5060                 return;
5061         }
5062
5063         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
5064
5065         if (so->so_proto->pr_getlock != NULL) {
5066                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5067         } else {
5068                 mutex_held = so->so_proto->pr_domain->dom_mtx;
5069         }
5070         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5071
5072         while (cfil_filters_attached(so)) {
5073                 /*
5074                  * Notify the filters we are going away so they can detach
5075                  */
5076                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
5077
5078                 /*
5079                  * Make sure we need to wait after the filter are notified
5080                  * of the disconnection
5081                  */
5082                 if (cfil_filters_attached(so) == 0) {
5083                         break;
5084                 }
5085
5086                 CFIL_LOG(LOG_INFO, "so %llx waiting",
5087                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5088
5089                 ts.tv_sec = cfil_close_wait_timeout / 1000;
5090                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5091                     NSEC_PER_USEC * 1000;
5092
5093                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5094                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
5095                 error = msleep((caddr_t)so->so_cfil, mutex_held,
5096                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
5097                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
5098
5099                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
5100                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
5101
5102                 /*
5103                  * Force close in case of timeout
5104                  */
5105                 if (error != 0) {
5106                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5107                         break;
5108                 }
5109         }
5110 }
5111
5112 /*
5113  * Returns the size of the data held by the content filter by using
5114  */
5115 int32_t
5116 cfil_sock_data_pending(struct sockbuf *sb)
5117 {
5118         struct socket *so = sb->sb_so;
5119         uint64_t pending = 0;
5120
5121         if (IS_UDP(so)) {
5122                 return cfil_sock_udp_data_pending(sb, FALSE);
5123         }
5124
5125         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
5126                 struct cfi_buf *cfi_buf;
5127
5128                 socket_lock_assert_owned(so);
5129
5130                 if ((sb->sb_flags & SB_RECV) == 0) {
5131                         cfi_buf = &so->so_cfil->cfi_snd;
5132                 } else {
5133                         cfi_buf = &so->so_cfil->cfi_rcv;
5134                 }
5135
5136                 pending = cfi_buf->cfi_pending_last -
5137                     cfi_buf->cfi_pending_first;
5138
5139                 /*
5140                  * If we are limited by the "chars of mbufs used" roughly
5141                  * adjust so we won't overcommit
5142                  */
5143                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
5144                         pending = cfi_buf->cfi_pending_mbcnt;
5145                 }
5146         }
5147
5148         VERIFY(pending < INT32_MAX);
5149
5150         return (int32_t)(pending);
5151 }
5152
5153 /*
5154  * Return the socket buffer space used by data being held by content filters
5155  * so processes won't clog the socket buffer
5156  */
5157 int32_t
5158 cfil_sock_data_space(struct sockbuf *sb)
5159 {
5160         struct socket *so = sb->sb_so;
5161         uint64_t pending = 0;
5162
5163         if (IS_UDP(so)) {
5164                 return cfil_sock_udp_data_pending(sb, TRUE);
5165         }
5166
5167         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
5168             so->so_snd.sb_cfil_thread != current_thread()) {
5169                 struct cfi_buf *cfi_buf;
5170
5171                 socket_lock_assert_owned(so);
5172
5173                 if ((sb->sb_flags & SB_RECV) == 0) {
5174                         cfi_buf = &so->so_cfil->cfi_snd;
5175                 } else {
5176                         cfi_buf = &so->so_cfil->cfi_rcv;
5177                 }
5178
5179                 pending = cfi_buf->cfi_pending_last -
5180                     cfi_buf->cfi_pending_first;
5181
5182                 /*
5183                  * If we are limited by the "chars of mbufs used" roughly
5184                  * adjust so we won't overcommit
5185                  */
5186                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5187                         pending = cfi_buf->cfi_pending_mbcnt;
5188                 }
5189         }
5190
5191         VERIFY(pending < INT32_MAX);
5192
5193         return (int32_t)(pending);
5194 }
5195
5196 /*
5197  * A callback from the socket and protocol layer when data becomes
5198  * available in the socket buffer to give a chance for the content filter
5199  * to re-inject data that was held back
5200  */
5201 void
5202 cfil_sock_buf_update(struct sockbuf *sb)
5203 {
5204         int outgoing;
5205         int error;
5206         struct socket *so = sb->sb_so;
5207
5208         if (IS_UDP(so)) {
5209                 cfil_sock_udp_buf_update(sb);
5210                 return;
5211         }
5212
5213         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5214                 return;
5215         }
5216
5217         if (!cfil_sbtrim) {
5218                 return;
5219         }
5220
5221         socket_lock_assert_owned(so);
5222
5223         if ((sb->sb_flags & SB_RECV) == 0) {
5224                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5225                         return;
5226                 }
5227                 outgoing = 1;
5228                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5229         } else {
5230                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5231                         return;
5232                 }
5233                 outgoing = 0;
5234                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5235         }
5236
5237         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5238             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5239
5240         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5241         if (error == 0) {
5242                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5243         }
5244         cfil_release_sockbuf(so, outgoing);
5245 }
5246
5247 int
5248 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5249     struct sysctl_req *req)
5250 {
5251 #pragma unused(oidp, arg1, arg2)
5252         int error = 0;
5253         size_t len = 0;
5254         u_int32_t i;
5255
5256         /* Read only  */
5257         if (req->newptr != USER_ADDR_NULL) {
5258                 return EPERM;
5259         }
5260
5261         cfil_rw_lock_shared(&cfil_lck_rw);
5262
5263         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5264                 struct cfil_filter_stat filter_stat;
5265                 struct content_filter *cfc = content_filters[i];
5266
5267                 if (cfc == NULL) {
5268                         continue;
5269                 }
5270
5271                 /* If just asking for the size */
5272                 if (req->oldptr == USER_ADDR_NULL) {
5273                         len += sizeof(struct cfil_filter_stat);
5274                         continue;
5275                 }
5276
5277                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5278                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5279                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5280                 filter_stat.cfs_flags = cfc->cf_flags;
5281                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5282                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5283
5284                 error = SYSCTL_OUT(req, &filter_stat,
5285                     sizeof(struct cfil_filter_stat));
5286                 if (error != 0) {
5287                         break;
5288                 }
5289         }
5290         /* If just asking for the size */
5291         if (req->oldptr == USER_ADDR_NULL) {
5292                 req->oldidx = len;
5293         }
5294
5295         cfil_rw_unlock_shared(&cfil_lck_rw);
5296
5297 #if SHOW_DEBUG
5298         if (req->oldptr != USER_ADDR_NULL) {
5299                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5300                         cfil_filter_show(i);
5301                 }
5302         }
5303 #endif
5304
5305         return error;
5306 }
5307
5308 static int
5309 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5310     struct sysctl_req *req)
5311 {
5312 #pragma unused(oidp, arg1, arg2)
5313         int error = 0;
5314         u_int32_t i;
5315         struct cfil_info *cfi;
5316
5317         /* Read only  */
5318         if (req->newptr != USER_ADDR_NULL) {
5319                 return EPERM;
5320         }
5321
5322         cfil_rw_lock_shared(&cfil_lck_rw);
5323
5324         /*
5325          * If just asking for the size,
5326          */
5327         if (req->oldptr == USER_ADDR_NULL) {
5328                 req->oldidx = cfil_sock_attached_count *
5329                     sizeof(struct cfil_sock_stat);
5330                 /* Bump the length in case new sockets gets attached */
5331                 req->oldidx += req->oldidx >> 3;
5332                 goto done;
5333         }
5334
5335         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5336                 struct cfil_entry *entry;
5337                 struct cfil_sock_stat stat;
5338                 struct socket *so = cfi->cfi_so;
5339
5340                 bzero(&stat, sizeof(struct cfil_sock_stat));
5341                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5342                 stat.cfs_sock_id = cfi->cfi_sock_id;
5343                 stat.cfs_flags = cfi->cfi_flags;
5344
5345                 if (so != NULL) {
5346                         stat.cfs_pid = so->last_pid;
5347                         memcpy(stat.cfs_uuid, so->last_uuid,
5348                             sizeof(uuid_t));
5349                         if (so->so_flags & SOF_DELEGATED) {
5350                                 stat.cfs_e_pid = so->e_pid;
5351                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5352                                     sizeof(uuid_t));
5353                         } else {
5354                                 stat.cfs_e_pid = so->last_pid;
5355                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5356                                     sizeof(uuid_t));
5357                         }
5358
5359                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5360                         stat.cfs_sock_type = so->so_proto->pr_type;
5361                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5362                 }
5363
5364                 stat.cfs_snd.cbs_pending_first =
5365                     cfi->cfi_snd.cfi_pending_first;
5366                 stat.cfs_snd.cbs_pending_last =
5367                     cfi->cfi_snd.cfi_pending_last;
5368                 stat.cfs_snd.cbs_inject_q_len =
5369                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5370                 stat.cfs_snd.cbs_pass_offset =
5371                     cfi->cfi_snd.cfi_pass_offset;
5372
5373                 stat.cfs_rcv.cbs_pending_first =
5374                     cfi->cfi_rcv.cfi_pending_first;
5375                 stat.cfs_rcv.cbs_pending_last =
5376                     cfi->cfi_rcv.cfi_pending_last;
5377                 stat.cfs_rcv.cbs_inject_q_len =
5378                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5379                 stat.cfs_rcv.cbs_pass_offset =
5380                     cfi->cfi_rcv.cfi_pass_offset;
5381
5382                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5383                         struct cfil_entry_stat *estat;
5384                         struct cfe_buf *ebuf;
5385                         struct cfe_buf_stat *sbuf;
5386
5387                         entry = &cfi->cfi_entries[i];
5388
5389                         estat = &stat.ces_entries[i];
5390
5391                         estat->ces_len = sizeof(struct cfil_entry_stat);
5392                         estat->ces_filter_id = entry->cfe_filter ?
5393                             entry->cfe_filter->cf_kcunit : 0;
5394                         estat->ces_flags = entry->cfe_flags;
5395                         estat->ces_necp_control_unit =
5396                             entry->cfe_necp_control_unit;
5397
5398                         estat->ces_last_event.tv_sec =
5399                             (int64_t)entry->cfe_last_event.tv_sec;
5400                         estat->ces_last_event.tv_usec =
5401                             (int64_t)entry->cfe_last_event.tv_usec;
5402
5403                         estat->ces_last_action.tv_sec =
5404                             (int64_t)entry->cfe_last_action.tv_sec;
5405                         estat->ces_last_action.tv_usec =
5406                             (int64_t)entry->cfe_last_action.tv_usec;
5407
5408                         ebuf = &entry->cfe_snd;
5409                         sbuf = &estat->ces_snd;
5410                         sbuf->cbs_pending_first =
5411                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5412                         sbuf->cbs_pending_last =
5413                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5414                         sbuf->cbs_ctl_first =
5415                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5416                         sbuf->cbs_ctl_last =
5417                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5418                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5419                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5420                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5421
5422                         ebuf = &entry->cfe_rcv;
5423                         sbuf = &estat->ces_rcv;
5424                         sbuf->cbs_pending_first =
5425                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5426                         sbuf->cbs_pending_last =
5427                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5428                         sbuf->cbs_ctl_first =
5429                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5430                         sbuf->cbs_ctl_last =
5431                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5432                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5433                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5434                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5435                 }
5436                 error = SYSCTL_OUT(req, &stat,
5437                     sizeof(struct cfil_sock_stat));
5438                 if (error != 0) {
5439                         break;
5440                 }
5441         }
5442 done:
5443         cfil_rw_unlock_shared(&cfil_lck_rw);
5444
5445 #if SHOW_DEBUG
5446         if (req->oldptr != USER_ADDR_NULL) {
5447                 cfil_info_show();
5448         }
5449 #endif
5450
5451         return error;
5452 }
5453
5454 /*
5455  * UDP Socket Support
5456  */
5457 static void
5458 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5459 {
5460         char local[MAX_IPv6_STR_LEN + 6];
5461         char remote[MAX_IPv6_STR_LEN + 6];
5462         const void  *addr;
5463
5464         // No sock or not UDP, no-op
5465         if (so == NULL || entry == NULL) {
5466                 return;
5467         }
5468
5469         local[0] = remote[0] = 0x0;
5470
5471         switch (entry->cfentry_family) {
5472         case AF_INET6:
5473                 addr = &entry->cfentry_laddr.addr6;
5474                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5475                 addr = &entry->cfentry_faddr.addr6;
5476                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5477                 break;
5478         case AF_INET:
5479                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5480                 inet_ntop(AF_INET, addr, local, sizeof(local));
5481                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5482                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5483                 break;
5484         default:
5485                 return;
5486         }
5487
5488         CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5489             msg,
5490             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5491             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5492 }
5493
5494 static void
5495 cfil_inp_log(int level, struct socket *so, const char* msg)
5496 {
5497         struct inpcb *inp = NULL;
5498         char local[MAX_IPv6_STR_LEN + 6];
5499         char remote[MAX_IPv6_STR_LEN + 6];
5500         const void  *addr;
5501
5502         if (so == NULL) {
5503                 return;
5504         }
5505
5506         inp = sotoinpcb(so);
5507         if (inp == NULL) {
5508                 return;
5509         }
5510
5511         local[0] = remote[0] = 0x0;
5512
5513 #if INET6
5514         if (inp->inp_vflag & INP_IPV6) {
5515                 addr = &inp->in6p_laddr.s6_addr32;
5516                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5517                 addr = &inp->in6p_faddr.s6_addr32;
5518                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5519         } else
5520 #endif /* INET6 */
5521         {
5522                 addr = &inp->inp_laddr.s_addr;
5523                 inet_ntop(AF_INET, addr, local, sizeof(local));
5524                 addr = &inp->inp_faddr.s_addr;
5525                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5526         }
5527
5528         if (so->so_cfil != NULL) {
5529                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5530                     msg, IS_UDP(so) ? "UDP" : "TCP",
5531                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5532                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5533         } else {
5534                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5535                     msg, IS_UDP(so) ? "UDP" : "TCP",
5536                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5537                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5538         }
5539 }
5540
5541 static void
5542 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5543 {
5544         if (cfil_info == NULL) {
5545                 return;
5546         }
5547
5548         if (cfil_info->cfi_hash_entry != NULL) {
5549                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5550         } else {
5551                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5552         }
5553 }
5554
5555 errno_t
5556 cfil_db_init(struct socket *so)
5557 {
5558         errno_t error = 0;
5559         struct cfil_db *db = NULL;
5560
5561         CFIL_LOG(LOG_INFO, "");
5562
5563         db = zalloc(cfil_db_zone);
5564         if (db == NULL) {
5565                 error = ENOMEM;
5566                 goto done;
5567         }
5568         bzero(db, sizeof(struct cfil_db));
5569         db->cfdb_so = so;
5570         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5571         if (db->cfdb_hashbase == NULL) {
5572                 zfree(cfil_db_zone, db);
5573                 db = NULL;
5574                 error = ENOMEM;
5575                 goto done;
5576         }
5577
5578         so->so_cfil_db = db;
5579
5580 done:
5581         return error;
5582 }
5583
5584 void
5585 cfil_db_free(struct socket *so)
5586 {
5587         struct cfil_hash_entry *entry = NULL;
5588         struct cfil_hash_entry *temp_entry = NULL;
5589         struct cfilhashhead *cfilhash = NULL;
5590         struct cfil_db *db = NULL;
5591
5592         CFIL_LOG(LOG_INFO, "");
5593
5594         if (so == NULL || so->so_cfil_db == NULL) {
5595                 return;
5596         }
5597         db = so->so_cfil_db;
5598
5599 #if LIFECYCLE_DEBUG
5600         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5601             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5602 #endif
5603
5604         for (int i = 0; i < CFILHASHSIZE; i++) {
5605                 cfilhash = &db->cfdb_hashbase[i];
5606                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5607                         if (entry->cfentry_cfil != NULL) {
5608 #if LIFECYCLE_DEBUG
5609                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5610 #endif
5611                                 cfil_info_free(entry->cfentry_cfil);
5612                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5613                                 entry->cfentry_cfil = NULL;
5614                         }
5615
5616                         cfil_db_delete_entry(db, entry);
5617                         if (so->so_flags & SOF_CONTENT_FILTER) {
5618                                 if (db->cfdb_count == 0) {
5619                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5620                                 }
5621                                 VERIFY(so->so_usecount > 0);
5622                                 so->so_usecount--;
5623                         }
5624                 }
5625         }
5626
5627         // Make sure all entries are cleaned up!
5628         VERIFY(db->cfdb_count == 0);
5629 #if LIFECYCLE_DEBUG
5630         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5631 #endif
5632
5633         FREE(db->cfdb_hashbase, M_CFIL);
5634         zfree(cfil_db_zone, db);
5635         so->so_cfil_db = NULL;
5636 }
5637
5638 static bool
5639 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5640 {
5641         struct sockaddr_in *sin = NULL;
5642         struct sockaddr_in6 *sin6 = NULL;
5643
5644         if (entry == NULL || addr == NULL) {
5645                 return FALSE;
5646         }
5647
5648         switch (addr->sa_family) {
5649         case AF_INET:
5650                 sin = satosin(addr);
5651                 if (sin->sin_len != sizeof(*sin)) {
5652                         return FALSE;
5653                 }
5654                 if (isLocal == TRUE) {
5655                         entry->cfentry_lport = sin->sin_port;
5656                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5657                 } else {
5658                         entry->cfentry_fport = sin->sin_port;
5659                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5660                 }
5661                 entry->cfentry_family = AF_INET;
5662                 return TRUE;
5663         case AF_INET6:
5664                 sin6 = satosin6(addr);
5665                 if (sin6->sin6_len != sizeof(*sin6)) {
5666                         return FALSE;
5667                 }
5668                 if (isLocal == TRUE) {
5669                         entry->cfentry_lport = sin6->sin6_port;
5670                         entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5671                 } else {
5672                         entry->cfentry_fport = sin6->sin6_port;
5673                         entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5674                 }
5675                 entry->cfentry_family = AF_INET6;
5676                 return TRUE;
5677         default:
5678                 return FALSE;
5679         }
5680 }
5681
5682 static bool
5683 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5684 {
5685         if (entry == NULL || inp == NULL) {
5686                 return FALSE;
5687         }
5688
5689         if (inp->inp_vflag & INP_IPV4) {
5690                 if (isLocal == TRUE) {
5691                         entry->cfentry_lport = inp->inp_lport;
5692                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5693                 } else {
5694                         entry->cfentry_fport = inp->inp_fport;
5695                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5696                 }
5697                 entry->cfentry_family = AF_INET;
5698                 return TRUE;
5699         } else if (inp->inp_vflag & INP_IPV6) {
5700                 if (isLocal == TRUE) {
5701                         entry->cfentry_lport = inp->inp_lport;
5702                         entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5703                 } else {
5704                         entry->cfentry_fport = inp->inp_fport;
5705                         entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5706                 }
5707                 entry->cfentry_family = AF_INET6;
5708                 return TRUE;
5709         }
5710         return FALSE;
5711 }
5712
5713 bool
5714 check_port(struct sockaddr *addr, u_short port)
5715 {
5716         struct sockaddr_in *sin = NULL;
5717         struct sockaddr_in6 *sin6 = NULL;
5718
5719         if (addr == NULL || port == 0) {
5720                 return FALSE;
5721         }
5722
5723         switch (addr->sa_family) {
5724         case AF_INET:
5725                 sin = satosin(addr);
5726                 if (sin->sin_len != sizeof(*sin)) {
5727                         return FALSE;
5728                 }
5729                 if (port == ntohs(sin->sin_port)) {
5730                         return TRUE;
5731                 }
5732                 break;
5733         case AF_INET6:
5734                 sin6 = satosin6(addr);
5735                 if (sin6->sin6_len != sizeof(*sin6)) {
5736                         return FALSE;
5737                 }
5738                 if (port == ntohs(sin6->sin6_port)) {
5739                         return TRUE;
5740                 }
5741                 break;
5742         default:
5743                 break;
5744         }
5745         return FALSE;
5746 }
5747
5748 struct cfil_hash_entry *
5749 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5750 {
5751         struct cfilhashhead *cfilhash = NULL;
5752         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5753         struct cfil_hash_entry *nextentry;
5754
5755         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5756                 return NULL;
5757         }
5758
5759         flowhash &= db->cfdb_hashmask;
5760         cfilhash = &db->cfdb_hashbase[flowhash];
5761
5762         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5763                 if (nextentry->cfentry_cfil != NULL &&
5764                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5765                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5766                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5767                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5768                         return nextentry;
5769                 }
5770         }
5771
5772         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5773             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5774         return NULL;
5775 }
5776
5777 struct cfil_hash_entry *
5778 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5779 {
5780         struct cfil_hash_entry matchentry;
5781         struct cfil_hash_entry *nextentry = NULL;
5782         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5783         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5784         int inp_hash_element = 0;
5785         struct cfilhashhead *cfilhash = NULL;
5786
5787         CFIL_LOG(LOG_INFO, "");
5788
5789         if (inp == NULL) {
5790                 goto done;
5791         }
5792
5793         if (local != NULL) {
5794                 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5795         } else {
5796                 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5797         }
5798         if (remote != NULL) {
5799                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5800         } else {
5801                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5802         }
5803
5804 #if INET6
5805         if (inp->inp_vflag & INP_IPV6) {
5806                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5807                 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5808         } else
5809 #endif /* INET6 */
5810         {
5811                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5812                 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5813         }
5814
5815         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5816             matchentry.cfentry_lport, matchentry.cfentry_fport);
5817         inp_hash_element &= db->cfdb_hashmask;
5818
5819         cfilhash = &db->cfdb_hashbase[inp_hash_element];
5820
5821         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5822 #if INET6
5823                 if ((inp->inp_vflag & INP_IPV6) &&
5824                     nextentry->cfentry_lport == matchentry.cfentry_lport &&
5825                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5826                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5827                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5828 #if DATA_DEBUG
5829                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5830 #endif
5831                         return nextentry;
5832                 } else
5833 #endif /* INET6 */
5834                 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5835                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5836                     nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5837                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5838 #if DATA_DEBUG
5839                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5840 #endif
5841                         return nextentry;
5842                 }
5843         }
5844
5845 done:
5846 #if DATA_DEBUG
5847         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5848 #endif
5849         return NULL;
5850 }
5851
5852 void
5853 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5854 {
5855         if (hash_entry == NULL) {
5856                 return;
5857         }
5858         if (db == NULL || db->cfdb_count == 0) {
5859                 return;
5860         }
5861         db->cfdb_count--;
5862         if (db->cfdb_only_entry == hash_entry) {
5863                 db->cfdb_only_entry = NULL;
5864         }
5865         LIST_REMOVE(hash_entry, cfentry_link);
5866         zfree(cfil_hash_entry_zone, hash_entry);
5867 }
5868
5869 struct cfil_hash_entry *
5870 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5871 {
5872         struct cfil_hash_entry *entry = NULL;
5873         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5874         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5875         int inp_hash_element = 0;
5876         struct cfilhashhead *cfilhash = NULL;
5877
5878         CFIL_LOG(LOG_INFO, "");
5879
5880         if (inp == NULL) {
5881                 goto done;
5882         }
5883
5884         entry = zalloc(cfil_hash_entry_zone);
5885         if (entry == NULL) {
5886                 goto done;
5887         }
5888         bzero(entry, sizeof(struct cfil_hash_entry));
5889
5890         if (local != NULL) {
5891                 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5892         } else {
5893                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5894         }
5895         if (remote != NULL) {
5896                 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5897         } else {
5898                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5899         }
5900         entry->cfentry_lastused = net_uptime();
5901
5902 #if INET6
5903         if (inp->inp_vflag & INP_IPV6) {
5904                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5905                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5906         } else
5907 #endif /* INET6 */
5908         {
5909                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5910                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5911         }
5912         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5913             entry->cfentry_lport, entry->cfentry_fport);
5914         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5915
5916         cfilhash = &db->cfdb_hashbase[inp_hash_element];
5917
5918         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5919         db->cfdb_count++;
5920         db->cfdb_only_entry = entry;
5921         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5922
5923 done:
5924         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5925         return entry;
5926 }
5927
5928 struct cfil_info *
5929 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5930 {
5931         struct cfil_hash_entry *hash_entry = NULL;
5932
5933         CFIL_LOG(LOG_INFO, "");
5934
5935         if (db == NULL || id == 0) {
5936                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5937                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
5938                 return NULL;
5939         }
5940
5941         // This is an optimization for connected UDP socket which only has one flow.
5942         // No need to do the hash lookup.
5943         if (db->cfdb_count == 1) {
5944                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5945                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5946                         return db->cfdb_only_entry->cfentry_cfil;
5947                 }
5948         }
5949
5950         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5951         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
5952 }
5953
5954 struct cfil_hash_entry *
5955 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5956 {
5957         struct cfil_hash_entry *hash_entry = NULL;
5958
5959         errno_t error = 0;
5960         socket_lock_assert_owned(so);
5961
5962         // If new socket, allocate cfil db
5963         if (so->so_cfil_db == NULL) {
5964                 if (cfil_db_init(so) != 0) {
5965                         return NULL;
5966                 }
5967         }
5968
5969         // See if flow already exists.
5970         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5971         if (hash_entry != NULL) {
5972                 return hash_entry;
5973         }
5974
5975         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5976         if (hash_entry == NULL) {
5977                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5978                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5979                 return NULL;
5980         }
5981
5982         if (cfil_info_alloc(so, hash_entry) == NULL ||
5983             hash_entry->cfentry_cfil == NULL) {
5984                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5985                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5986                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5987                 return NULL;
5988         }
5989         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
5990
5991 #if LIFECYCLE_DEBUG
5992         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5993 #endif
5994
5995         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5996                 cfil_info_free(hash_entry->cfentry_cfil);
5997                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5998                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5999                     filter_control_unit);
6000                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
6001                 return NULL;
6002         }
6003         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
6004             (uint64_t)VM_KERNEL_ADDRPERM(so),
6005             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
6006
6007         so->so_flags |= SOF_CONTENT_FILTER;
6008         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
6009
6010         /* Hold a reference on the socket for each flow */
6011         so->so_usecount++;
6012
6013         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
6014             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
6015         /* We can recover from flow control or out of memory errors */
6016         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
6017                 return NULL;
6018         }
6019
6020         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
6021         return hash_entry;
6022 }
6023
6024 errno_t
6025 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
6026     struct sockaddr *local, struct sockaddr *remote,
6027     struct mbuf *data, struct mbuf *control, uint32_t flags)
6028 {
6029 #pragma unused(outgoing, so, local, remote, data, control, flags)
6030         errno_t error = 0;
6031         uint32_t filter_control_unit;
6032         struct cfil_hash_entry *hash_entry = NULL;
6033         struct cfil_info *cfil_info = NULL;
6034
6035         socket_lock_assert_owned(so);
6036
6037         if (cfil_active_count == 0) {
6038                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
6039                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
6040                 return error;
6041         }
6042
6043         // Socket has been blessed
6044         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
6045                 return error;
6046         }
6047
6048         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
6049         if (filter_control_unit == 0) {
6050                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
6051                 return error;
6052         }
6053
6054         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
6055                 return error;
6056         }
6057
6058         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
6059                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
6060                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
6061                 return error;
6062         }
6063
6064         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
6065         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
6066                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
6067                 return EPIPE;
6068         }
6069         // Update last used timestamp, this is for flow Idle TO
6070         hash_entry->cfentry_lastused = net_uptime();
6071         cfil_info = hash_entry->cfentry_cfil;
6072
6073         if (cfil_info->cfi_flags & CFIF_DROP) {
6074 #if DATA_DEBUG
6075                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
6076 #endif
6077                 return EPIPE;
6078         }
6079         if (control != NULL) {
6080                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
6081         }
6082         if (data->m_type == MT_OOBDATA) {
6083                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
6084                     (uint64_t)VM_KERNEL_ADDRPERM(so));
6085                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
6086         }
6087
6088         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
6089
6090         return error;
6091 }
6092
6093 /*
6094  * Go through all UDP flows for specified socket and returns TRUE if
6095  * any flow is still attached.  If need_wait is TRUE, wait on first
6096  * attached flow.
6097  */
6098 static int
6099 cfil_filters_udp_attached(struct socket *so, bool need_wait)
6100 {
6101         struct timespec ts;
6102         lck_mtx_t *mutex_held;
6103         struct cfilhashhead *cfilhash = NULL;
6104         struct cfil_db *db = NULL;
6105         struct cfil_hash_entry *hash_entry = NULL;
6106         struct cfil_hash_entry *temp_hash_entry = NULL;
6107         struct cfil_info *cfil_info = NULL;
6108         struct cfil_entry *entry = NULL;
6109         errno_t error = 0;
6110         int kcunit;
6111         int attached = 0;
6112         uint64_t sock_flow_id = 0;
6113
6114         socket_lock_assert_owned(so);
6115
6116         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6117                 if (so->so_proto->pr_getlock != NULL) {
6118                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
6119                 } else {
6120                         mutex_held = so->so_proto->pr_domain->dom_mtx;
6121                 }
6122                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
6123
6124                 db = so->so_cfil_db;
6125
6126                 for (int i = 0; i < CFILHASHSIZE; i++) {
6127                         cfilhash = &db->cfdb_hashbase[i];
6128
6129                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6130                                 if (hash_entry->cfentry_cfil != NULL) {
6131                                         cfil_info = hash_entry->cfentry_cfil;
6132                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6133                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
6134
6135                                                 /* Are we attached to the filter? */
6136                                                 if (entry->cfe_filter == NULL) {
6137                                                         continue;
6138                                                 }
6139
6140                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
6141                                                         continue;
6142                                                 }
6143                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
6144                                                         continue;
6145                                                 }
6146
6147                                                 attached = 1;
6148
6149                                                 if (need_wait == TRUE) {
6150 #if LIFECYCLE_DEBUG
6151                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
6152 #endif
6153
6154                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
6155                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
6156                                                             NSEC_PER_USEC * 1000;
6157
6158                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
6159                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
6160                                                         sock_flow_id = cfil_info->cfi_sock_id;
6161
6162                                                         error = msleep((caddr_t)cfil_info, mutex_held,
6163                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
6164
6165                                                         // Woke up from sleep, validate if cfil_info is still valid
6166                                                         if (so->so_cfil_db == NULL ||
6167                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
6168                                                                 // cfil_info is not valid, do not continue
6169                                                                 goto done;
6170                                                         }
6171
6172                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
6173
6174 #if LIFECYCLE_DEBUG
6175                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6176 #endif
6177
6178                                                         /*
6179                                                          * Force close in case of timeout
6180                                                          */
6181                                                         if (error != 0) {
6182                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6183 #if LIFECYCLE_DEBUG
6184                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6185 #endif
6186                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6187                                                         }
6188                                                 }
6189                                                 goto done;
6190                                         }
6191                                 }
6192                         }
6193                 }
6194         }
6195
6196 done:
6197         return attached;
6198 }
6199
6200 int32_t
6201 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6202 {
6203         struct socket *so = sb->sb_so;
6204         struct cfi_buf *cfi_buf;
6205         uint64_t pending = 0;
6206         uint64_t total_pending = 0;
6207         struct cfilhashhead *cfilhash = NULL;
6208         struct cfil_db *db = NULL;
6209         struct cfil_hash_entry *hash_entry = NULL;
6210         struct cfil_hash_entry *temp_hash_entry = NULL;
6211
6212         socket_lock_assert_owned(so);
6213
6214         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6215             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6216                 db = so->so_cfil_db;
6217
6218                 for (int i = 0; i < CFILHASHSIZE; i++) {
6219                         cfilhash = &db->cfdb_hashbase[i];
6220
6221                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6222                                 if (hash_entry->cfentry_cfil != NULL) {
6223                                         if ((sb->sb_flags & SB_RECV) == 0) {
6224                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6225                                         } else {
6226                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6227                                         }
6228
6229                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6230                                         /*
6231                                          * If we are limited by the "chars of mbufs used" roughly
6232                                          * adjust so we won't overcommit
6233                                          */
6234                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6235                                                 pending = cfi_buf->cfi_pending_mbcnt;
6236                                         }
6237
6238                                         total_pending += pending;
6239                                 }
6240                         }
6241                 }
6242
6243                 VERIFY(total_pending < INT32_MAX);
6244 #if DATA_DEBUG
6245                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6246                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6247                     total_pending, check_thread);
6248 #endif
6249         }
6250
6251         return (int32_t)(total_pending);
6252 }
6253
6254 int
6255 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6256 {
6257         struct cfil_info *cfil_info = NULL;
6258         struct cfilhashhead *cfilhash = NULL;
6259         struct cfil_db *db = NULL;
6260         struct cfil_hash_entry *hash_entry = NULL;
6261         struct cfil_hash_entry *temp_hash_entry = NULL;
6262         errno_t error = 0;
6263         int done_count = 0;
6264         int kcunit;
6265
6266         socket_lock_assert_owned(so);
6267
6268         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6269                 db = so->so_cfil_db;
6270
6271                 for (int i = 0; i < CFILHASHSIZE; i++) {
6272                         cfilhash = &db->cfdb_hashbase[i];
6273
6274                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6275                                 if (hash_entry->cfentry_cfil != NULL) {
6276                                         cfil_info = hash_entry->cfentry_cfil;
6277
6278                                         // This flow is marked as DROP
6279                                         if (cfil_info->cfi_flags & drop_flag) {
6280                                                 done_count++;
6281                                                 continue;
6282                                         }
6283
6284                                         // This flow has been shut already, skip
6285                                         if (cfil_info->cfi_flags & shut_flag) {
6286                                                 continue;
6287                                         }
6288                                         // Mark flow as shut
6289                                         cfil_info->cfi_flags |= shut_flag;
6290                                         done_count++;
6291
6292                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6293                                                 /* Disconnect incoming side */
6294                                                 if (how != SHUT_WR) {
6295                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6296                                                 }
6297                                                 /* Disconnect outgoing side */
6298                                                 if (how != SHUT_RD) {
6299                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6300                                                 }
6301                                         }
6302                                 }
6303                         }
6304                 }
6305         }
6306
6307         if (done_count == 0) {
6308                 error = ENOTCONN;
6309         }
6310         return error;
6311 }
6312
6313 int
6314 cfil_sock_udp_shutdown(struct socket *so, int *how)
6315 {
6316         int error = 0;
6317
6318         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6319                 goto done;
6320         }
6321
6322         socket_lock_assert_owned(so);
6323
6324         CFIL_LOG(LOG_INFO, "so %llx how %d",
6325             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6326
6327         /*
6328          * Check the state of the socket before the content filter
6329          */
6330         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6331                 /* read already shut down */
6332                 error = ENOTCONN;
6333                 goto done;
6334         }
6335         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6336                 /* write already shut down */
6337                 error = ENOTCONN;
6338                 goto done;
6339         }
6340
6341         /*
6342          * shutdown read: SHUT_RD or SHUT_RDWR
6343          */
6344         if (*how != SHUT_WR) {
6345                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6346                 if (error != 0) {
6347                         goto done;
6348                 }
6349         }
6350         /*
6351          * shutdown write: SHUT_WR or SHUT_RDWR
6352          */
6353         if (*how != SHUT_RD) {
6354                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6355                 if (error != 0) {
6356                         goto done;
6357                 }
6358
6359                 /*
6360                  * When outgoing data is pending, we delay the shutdown at the
6361                  * protocol level until the content filters give the final
6362                  * verdict on the pending data.
6363                  */
6364                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6365                         /*
6366                          * When shutting down the read and write sides at once
6367                          * we can proceed to the final shutdown of the read
6368                          * side. Otherwise, we just return.
6369                          */
6370                         if (*how == SHUT_WR) {
6371                                 error = EJUSTRETURN;
6372                         } else if (*how == SHUT_RDWR) {
6373                                 *how = SHUT_RD;
6374                         }
6375                 }
6376         }
6377 done:
6378         return error;
6379 }
6380
6381 void
6382 cfil_sock_udp_close_wait(struct socket *so)
6383 {
6384         socket_lock_assert_owned(so);
6385
6386         while (cfil_filters_udp_attached(so, FALSE)) {
6387                 /*
6388                  * Notify the filters we are going away so they can detach
6389                  */
6390                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6391
6392                 /*
6393                  * Make sure we need to wait after the filter are notified
6394                  * of the disconnection
6395                  */
6396                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6397                         break;
6398                 }
6399         }
6400 }
6401
6402 void
6403 cfil_sock_udp_is_closed(struct socket *so)
6404 {
6405         struct cfil_info *cfil_info = NULL;
6406         struct cfilhashhead *cfilhash = NULL;
6407         struct cfil_db *db = NULL;
6408         struct cfil_hash_entry *hash_entry = NULL;
6409         struct cfil_hash_entry *temp_hash_entry = NULL;
6410         errno_t error = 0;
6411         int kcunit;
6412
6413         socket_lock_assert_owned(so);
6414
6415         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6416                 db = so->so_cfil_db;
6417
6418                 for (int i = 0; i < CFILHASHSIZE; i++) {
6419                         cfilhash = &db->cfdb_hashbase[i];
6420
6421                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6422                                 if (hash_entry->cfentry_cfil != NULL) {
6423                                         cfil_info = hash_entry->cfentry_cfil;
6424
6425                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6426                                                 /* Let the filters know of the closing */
6427                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6428                                         }
6429
6430                                         /* Last chance to push passed data out */
6431                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6432                                         if (error == 0) {
6433                                                 cfil_service_inject_queue(so, cfil_info, 1);
6434                                         }
6435                                         cfil_release_sockbuf(so, 1);
6436
6437                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6438
6439                                         /* Pending data needs to go */
6440                                         cfil_flush_queues(so, cfil_info);
6441
6442                                         CFIL_INFO_VERIFY(cfil_info);
6443                                 }
6444                         }
6445                 }
6446         }
6447 }
6448
6449 void
6450 cfil_sock_udp_buf_update(struct sockbuf *sb)
6451 {
6452         struct cfil_info *cfil_info = NULL;
6453         struct cfilhashhead *cfilhash = NULL;
6454         struct cfil_db *db = NULL;
6455         struct cfil_hash_entry *hash_entry = NULL;
6456         struct cfil_hash_entry *temp_hash_entry = NULL;
6457         errno_t error = 0;
6458         int outgoing;
6459         struct socket *so = sb->sb_so;
6460
6461         socket_lock_assert_owned(so);
6462
6463         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6464                 if (!cfil_sbtrim) {
6465                         return;
6466                 }
6467
6468                 db = so->so_cfil_db;
6469
6470                 for (int i = 0; i < CFILHASHSIZE; i++) {
6471                         cfilhash = &db->cfdb_hashbase[i];
6472
6473                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6474                                 if (hash_entry->cfentry_cfil != NULL) {
6475                                         cfil_info = hash_entry->cfentry_cfil;
6476
6477                                         if ((sb->sb_flags & SB_RECV) == 0) {
6478                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6479                                                         return;
6480                                                 }
6481                                                 outgoing = 1;
6482                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6483                                         } else {
6484                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6485                                                         return;
6486                                                 }
6487                                                 outgoing = 0;
6488                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6489                                         }
6490
6491                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6492                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6493
6494                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6495                                         if (error == 0) {
6496                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6497                                         }
6498                                         cfil_release_sockbuf(so, outgoing);
6499                                 }
6500                         }
6501                 }
6502         }
6503 }
6504
6505 void
6506 cfil_filter_show(u_int32_t kcunit)
6507 {
6508         struct content_filter *cfc = NULL;
6509         struct cfil_entry *entry;
6510         int count = 0;
6511
6512         if (content_filters == NULL) {
6513                 return;
6514         }
6515         if (kcunit > MAX_CONTENT_FILTER) {
6516                 return;
6517         }
6518
6519         cfil_rw_lock_shared(&cfil_lck_rw);
6520
6521         if (content_filters[kcunit - 1] == NULL) {
6522                 cfil_rw_unlock_shared(&cfil_lck_rw);
6523                 return;
6524         }
6525         cfc = content_filters[kcunit - 1];
6526
6527         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6528             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6529         if (cfc->cf_flags & CFF_DETACHING) {
6530                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6531         }
6532         if (cfc->cf_flags & CFF_ACTIVE) {
6533                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6534         }
6535         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6536                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6537         }
6538
6539         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6540                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6541                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6542
6543                         count++;
6544
6545                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6546                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6547                         } else {
6548                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6549                         }
6550                 }
6551         }
6552
6553         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6554
6555         cfil_rw_unlock_shared(&cfil_lck_rw);
6556 }
6557
6558 void
6559 cfil_info_show(void)
6560 {
6561         struct cfil_info *cfil_info;
6562         int count = 0;
6563
6564         cfil_rw_lock_shared(&cfil_lck_rw);
6565
6566         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6567
6568         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6569                 count++;
6570
6571                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6572
6573                 if (cfil_info->cfi_flags & CFIF_DROP) {
6574                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6575                 }
6576                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6577                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6578                 }
6579                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6580                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6581                 }
6582                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6583                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6584                 }
6585                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6586                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6587                 }
6588                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6589                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6590                 }
6591                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6592                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6593                 }
6594         }
6595
6596         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6597
6598         cfil_rw_unlock_shared(&cfil_lck_rw);
6599 }
6600
6601 bool
6602 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6603 {
6604         if (cfil_info && cfil_info->cfi_hash_entry &&
6605             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6606 #if GC_DEBUG
6607                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6608 #endif
6609                 return true;
6610         }
6611         return false;
6612 }
6613
6614 bool
6615 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6616 {
6617         struct cfil_entry *entry;
6618         struct timeval current_tv;
6619         struct timeval diff_time;
6620
6621         if (cfil_info == NULL) {
6622                 return false;
6623         }
6624
6625         /*
6626          * If we have queued up more data than passed offset and we haven't received
6627          * an action from user space for a while (the user space filter might have crashed),
6628          * return action timed out.
6629          */
6630         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6631             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6632                 microuptime(&current_tv);
6633
6634                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6635                         entry = &cfil_info->cfi_entries[kcunit - 1];
6636
6637                         if (entry->cfe_filter == NULL) {
6638                                 continue;
6639                         }
6640
6641                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6642                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6643                                 // haven't gotten an action from this filter, check timeout
6644                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6645                                 if (diff_time.tv_sec >= timeout) {
6646 #if GC_DEBUG
6647                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6648 #endif
6649                                         return true;
6650                                 }
6651                         }
6652                 }
6653         }
6654         return false;
6655 }
6656
6657 bool
6658 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6659 {
6660         if (cfil_info == NULL) {
6661                 return false;
6662         }
6663
6664         /*
6665          * Clean up flow if it exceeded queue thresholds
6666          */
6667         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6668             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6669 #if GC_DEBUG
6670                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6671                     cfil_udp_gc_mbuf_num_max,
6672                     cfil_udp_gc_mbuf_cnt_max,
6673                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
6674                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6675                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6676 #endif
6677                 return true;
6678         }
6679
6680         return false;
6681 }
6682
6683 static void
6684 cfil_udp_gc_thread_sleep(bool forever)
6685 {
6686         if (forever) {
6687                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6688                     THREAD_INTERRUPTIBLE);
6689         } else {
6690                 uint64_t deadline = 0;
6691                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6692                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6693
6694                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6695                     THREAD_INTERRUPTIBLE, deadline);
6696         }
6697 }
6698
6699 static void
6700 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6701 {
6702 #pragma unused(v, w)
6703
6704         ASSERT(cfil_udp_gc_thread == current_thread());
6705         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6706
6707         // Kick off gc shortly
6708         cfil_udp_gc_thread_sleep(false);
6709         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6710         /* NOTREACHED */
6711 }
6712
6713 static void
6714 cfil_info_udp_expire(void *v, wait_result_t w)
6715 {
6716 #pragma unused(v, w)
6717
6718         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6719         static uint32_t expired_count = 0;
6720
6721         struct cfil_info *cfil_info;
6722         struct cfil_hash_entry *hash_entry;
6723         struct cfil_db *db;
6724         struct socket *so;
6725         u_int64_t current_time = 0;
6726
6727         current_time = net_uptime();
6728
6729         // Get all expired UDP flow ids
6730         cfil_rw_lock_shared(&cfil_lck_rw);
6731
6732         if (cfil_sock_udp_attached_count == 0) {
6733                 cfil_rw_unlock_shared(&cfil_lck_rw);
6734                 goto go_sleep;
6735         }
6736
6737         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6738                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6739                         break;
6740                 }
6741
6742                 if (IS_UDP(cfil_info->cfi_so)) {
6743                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6744                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6745                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
6746                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
6747                                 expired_count++;
6748                         }
6749                 }
6750         }
6751         cfil_rw_unlock_shared(&cfil_lck_rw);
6752
6753         if (expired_count == 0) {
6754                 goto go_sleep;
6755         }
6756
6757         for (uint32_t i = 0; i < expired_count; i++) {
6758                 // Search for socket (UDP only and lock so)
6759                 so = cfil_socket_from_sock_id(expired_array[i], true);
6760                 if (so == NULL) {
6761                         continue;
6762                 }
6763
6764                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6765                 if (cfil_info == NULL) {
6766                         goto unlock;
6767                 }
6768
6769                 db = so->so_cfil_db;
6770                 hash_entry = cfil_info->cfi_hash_entry;
6771
6772                 if (db == NULL || hash_entry == NULL) {
6773                         goto unlock;
6774                 }
6775
6776 #if GC_DEBUG || LIFECYCLE_DEBUG
6777                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6778 #endif
6779
6780                 cfil_db_delete_entry(db, hash_entry);
6781                 cfil_info_free(cfil_info);
6782                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6783
6784                 if (so->so_flags & SOF_CONTENT_FILTER) {
6785                         if (db->cfdb_count == 0) {
6786                                 so->so_flags &= ~SOF_CONTENT_FILTER;
6787                         }
6788                         VERIFY(so->so_usecount > 0);
6789                         so->so_usecount--;
6790                 }
6791 unlock:
6792                 socket_unlock(so, 1);
6793         }
6794
6795 #if GC_DEBUG
6796         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6797 #endif
6798         expired_count = 0;
6799
6800 go_sleep:
6801
6802         // Sleep forever (until waken up) if no more UDP flow to clean
6803         cfil_rw_lock_shared(&cfil_lck_rw);
6804         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
6805         cfil_rw_unlock_shared(&cfil_lck_rw);
6806         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
6807         /* NOTREACHED */
6808 }
6809
6810 struct m_tag *
6811 cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
6812 {
6813         struct m_tag *tag = NULL;
6814         struct cfil_tag *ctag = NULL;
6815         struct cfil_hash_entry *hash_entry = NULL;
6816
6817         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6818             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6819                 return NULL;
6820         }
6821
6822         /* Allocate a tag */
6823         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6824             sizeof(struct cfil_tag), M_DONTWAIT, m);
6825
6826         if (tag) {
6827                 ctag = (struct cfil_tag*)(tag + 1);
6828                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6829                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6830
6831                 hash_entry = cfil_info->cfi_hash_entry;
6832                 if (hash_entry->cfentry_family == AF_INET6) {
6833                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6834                             &hash_entry->cfentry_faddr.addr6,
6835                             hash_entry->cfentry_fport);
6836                 } else if (hash_entry->cfentry_family == AF_INET) {
6837                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6838                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
6839                             hash_entry->cfentry_fport);
6840                 }
6841                 m_tag_prepend(m, tag);
6842                 return tag;
6843         }
6844         return NULL;
6845 }
6846
6847 struct m_tag *
6848 cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6849     struct sockaddr **faddr)
6850 {
6851         struct m_tag *tag = NULL;
6852         struct cfil_tag *ctag = NULL;
6853
6854         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6855         if (tag) {
6856                 ctag = (struct cfil_tag *)(tag + 1);
6857                 if (state_change_cnt) {
6858                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
6859                 }
6860                 if (options) {
6861                         *options = ctag->cfil_so_options;
6862                 }
6863                 if (faddr) {
6864                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6865                 }
6866
6867                 /*
6868                  * Unlink tag and hand it over to caller.
6869                  * Note that caller will be responsible to free it.
6870                  */
6871                 m_tag_unlink(m, tag);
6872                 return tag;
6873         }
6874         return NULL;
6875 }
6876
6877 static int
6878 cfil_dispatch_stats_event_locked(int kcunit, struct cfil_stats_report_buffer *buffer, uint32_t stats_count)
6879 {
6880         struct content_filter *cfc = NULL;
6881         errno_t error = 0;
6882         size_t msgsize = 0;
6883
6884         if (buffer == NULL || stats_count == 0) {
6885                 return error;
6886         }
6887
6888         if (content_filters == NULL || kcunit > MAX_CONTENT_FILTER) {
6889                 return error;
6890         }
6891
6892         cfc = content_filters[kcunit - 1];
6893         if (cfc == NULL) {
6894                 return error;
6895         }
6896
6897         /* Would be wasteful to try */
6898         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6899                 error = ENOBUFS;
6900                 goto done;
6901         }
6902
6903         msgsize = sizeof(struct cfil_msg_stats_report) + (sizeof(struct cfil_msg_sock_stats) * stats_count);
6904         buffer->msghdr.cfm_len = msgsize;
6905         buffer->msghdr.cfm_version = 1;
6906         buffer->msghdr.cfm_type = CFM_TYPE_EVENT;
6907         buffer->msghdr.cfm_op = CFM_OP_STATS;
6908         buffer->msghdr.cfm_sock_id = 0;
6909         buffer->count = stats_count;
6910
6911 #if STATS_DEBUG
6912         CFIL_LOG(LOG_ERR, "STATS (kcunit %d): msg size %lu - %lu %lu %lu",
6913             kcunit,
6914             (unsigned long)msgsize,
6915             (unsigned long)sizeof(struct cfil_msg_stats_report),
6916             (unsigned long)sizeof(struct cfil_msg_sock_stats),
6917             (unsigned long)stats_count);
6918 #endif
6919
6920         error = ctl_enqueuedata(cfc->cf_kcref, cfc->cf_kcunit,
6921             buffer,
6922             msgsize,
6923             CTL_DATA_EOR);
6924         if (error != 0) {
6925                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
6926                 goto done;
6927         }
6928         OSIncrementAtomic(&cfil_stats.cfs_stats_event_ok);
6929
6930 #if STATS_DEBUG
6931         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT: send msg to %d", kcunit);
6932 #endif
6933
6934 done:
6935
6936         if (error == ENOBUFS) {
6937                 OSIncrementAtomic(
6938                         &cfil_stats.cfs_stats_event_flow_control);
6939
6940                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
6941                         cfil_rw_lock_exclusive(&cfil_lck_rw);
6942                 }
6943
6944                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
6945
6946                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
6947         } else if (error != 0) {
6948                 OSIncrementAtomic(&cfil_stats.cfs_stats_event_fail);
6949         }
6950
6951         return error;
6952 }
6953
6954 static void
6955 cfil_stats_report_thread_sleep(bool forever)
6956 {
6957 #if STATS_DEBUG
6958         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION SLEEP");
6959 #endif
6960
6961         if (forever) {
6962                 (void) assert_wait((event_t) &cfil_sock_attached_stats_count,
6963                     THREAD_INTERRUPTIBLE);
6964         } else {
6965                 uint64_t deadline = 0;
6966                 nanoseconds_to_absolutetime(CFIL_STATS_REPORT_RUN_INTERVAL_NSEC, &deadline);
6967                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6968
6969                 (void) assert_wait_deadline(&cfil_sock_attached_stats_count,
6970                     THREAD_INTERRUPTIBLE, deadline);
6971         }
6972 }
6973
6974 static void
6975 cfil_stats_report_thread_func(void *v, wait_result_t w)
6976 {
6977 #pragma unused(v, w)
6978
6979         ASSERT(cfil_stats_report_thread == current_thread());
6980         thread_set_thread_name(current_thread(), "CFIL_STATS_REPORT");
6981
6982         // Kick off gc shortly
6983         cfil_stats_report_thread_sleep(false);
6984         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
6985         /* NOTREACHED */
6986 }
6987
6988 static bool
6989 cfil_stats_collect_flow_stats_for_filter(int kcunit,
6990     struct cfil_info *cfil_info,
6991     struct cfil_entry *entry,
6992     struct timeval current_tv)
6993 {
6994         struct cfil_stats_report_buffer *buffer = NULL;
6995         struct cfil_msg_sock_stats *flow_array = NULL;
6996         struct cfil_msg_sock_stats *stats = NULL;
6997         struct inpcb *inp = NULL;
6998         struct timeval diff_time;
6999         uint64_t diff_time_usecs;
7000         int index = 0;
7001
7002         if (entry->cfe_stats_report_frequency == 0) {
7003                 return false;
7004         }
7005
7006         buffer = global_cfil_stats_report_buffers[kcunit - 1];
7007         if (buffer == NULL) {
7008 #if STATS_DEBUG
7009                 CFIL_LOG(LOG_ERR, "CFIL: STATS: no buffer");
7010 #endif
7011                 return false;
7012         }
7013
7014         timersub(&current_tv, &entry->cfe_stats_report_ts, &diff_time);
7015         diff_time_usecs = (diff_time.tv_sec * USEC_PER_SEC) + diff_time.tv_usec;
7016
7017 #if STATS_DEBUG
7018         CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - elapsed time - ts %llu %llu cur ts %llu %llu diff %llu %llu (usecs %llu) @freq %llu usecs sockID %llu",
7019             (unsigned long long)entry->cfe_stats_report_ts.tv_sec,
7020             (unsigned long long)entry->cfe_stats_report_ts.tv_usec,
7021             (unsigned long long)current_tv.tv_sec,
7022             (unsigned long long)current_tv.tv_usec,
7023             (unsigned long long)diff_time.tv_sec,
7024             (unsigned long long)diff_time.tv_usec,
7025             (unsigned long long)diff_time_usecs,
7026             (unsigned long long)((entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC),
7027             cfil_info->cfi_sock_id);
7028 #endif
7029
7030         // Compare elapsed time in usecs
7031         if (diff_time_usecs >= (entry->cfe_stats_report_frequency * NSEC_PER_MSEC) / NSEC_PER_USEC) {
7032 #if STATS_DEBUG
7033                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - in %llu reported %llu",
7034                     cfil_info->cfi_byte_inbound_count,
7035                     entry->cfe_byte_inbound_count_reported);
7036                 CFIL_LOG(LOG_ERR, "CFIL: STATS REPORT - out %llu reported %llu",
7037                     cfil_info->cfi_byte_outbound_count,
7038                     entry->cfe_byte_outbound_count_reported);
7039 #endif
7040                 // Check if flow has new bytes that have not been reported
7041                 if (entry->cfe_byte_inbound_count_reported < cfil_info->cfi_byte_inbound_count ||
7042                     entry->cfe_byte_outbound_count_reported < cfil_info->cfi_byte_outbound_count) {
7043                         flow_array = (struct cfil_msg_sock_stats *)&buffer->stats;
7044                         index = global_cfil_stats_counts[kcunit - 1];
7045
7046                         stats = &flow_array[index];
7047                         stats->cfs_sock_id = cfil_info->cfi_sock_id;
7048                         stats->cfs_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
7049                         stats->cfs_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
7050
7051                         if (entry->cfe_laddr_sent == false) {
7052                                 /* cache it if necessary */
7053                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len == 0) {
7054                                         inp = cfil_info->cfi_so ? sotoinpcb(cfil_info->cfi_so) : NULL;
7055                                         if (inp != NULL) {
7056                                                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
7057                                                 union sockaddr_in_4_6 *src = outgoing ? &cfil_info->cfi_so_attach_laddr : NULL;
7058                                                 union sockaddr_in_4_6 *dst = outgoing ? NULL : &cfil_info->cfi_so_attach_laddr;
7059                                                 cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
7060                                                     src, dst, inp->inp_vflag & INP_IPV4, outgoing);
7061                                         }
7062                                 }
7063
7064                                 if (cfil_info->cfi_so_attach_laddr.sa.sa_len != 0) {
7065                                         stats->cfs_laddr.sin6 = cfil_info->cfi_so_attach_laddr.sin6;
7066                                         entry->cfe_laddr_sent = true;
7067                                 }
7068                         }
7069
7070                         global_cfil_stats_counts[kcunit - 1]++;
7071
7072                         entry->cfe_stats_report_ts = current_tv;
7073                         entry->cfe_byte_inbound_count_reported = cfil_info->cfi_byte_inbound_count;
7074                         entry->cfe_byte_outbound_count_reported = cfil_info->cfi_byte_outbound_count;
7075 #if STATS_DEBUG
7076                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: STATS COLLECTED");
7077 #endif
7078                         CFI_ADD_TIME_LOG(cfil_info, &current_tv, &cfil_info->cfi_first_event, CFM_OP_STATS);
7079                         return true;
7080                 }
7081         }
7082         return false;
7083 }
7084
7085 static void
7086 cfil_stats_report(void *v, wait_result_t w)
7087 {
7088 #pragma unused(v, w)
7089
7090         struct cfil_info *cfil_info = NULL;
7091         struct cfil_entry *entry = NULL;
7092         struct timeval current_tv;
7093         uint32_t flow_count = 0;
7094         uint64_t saved_next_sock_id = 0; // Next sock id to be reported for next loop
7095         bool flow_reported = false;
7096
7097 #if STATS_DEBUG
7098         CFIL_LOG(LOG_ERR, "CFIL: STATS COLLECTION RUNNING");
7099 #endif
7100
7101         do {
7102                 // Collect all sock ids of flows that has new stats
7103                 cfil_rw_lock_shared(&cfil_lck_rw);
7104
7105                 if (cfil_sock_attached_stats_count == 0) {
7106 #if STATS_DEBUG
7107                         CFIL_LOG(LOG_ERR, "CFIL: STATS: no flow");
7108 #endif
7109                         cfil_rw_unlock_shared(&cfil_lck_rw);
7110                         goto go_sleep;
7111                 }
7112
7113                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7114                         if (global_cfil_stats_report_buffers[kcunit - 1] != NULL) {
7115                                 memset(global_cfil_stats_report_buffers[kcunit - 1], 0, sizeof(struct cfil_stats_report_buffer));
7116                         }
7117                         global_cfil_stats_counts[kcunit - 1] = 0;
7118                 }
7119
7120                 microuptime(&current_tv);
7121                 flow_count = 0;
7122
7123                 TAILQ_FOREACH(cfil_info, &cfil_sock_head_stats, cfi_link_stats) {
7124                         if (saved_next_sock_id != 0 &&
7125                             saved_next_sock_id == cfil_info->cfi_sock_id) {
7126                                 // Here is where we left off previously, start accumulating
7127                                 saved_next_sock_id = 0;
7128                         }
7129
7130                         if (saved_next_sock_id == 0) {
7131                                 if (flow_count >= CFIL_STATS_REPORT_MAX_COUNT) {
7132                                         // Examine a fixed number of flows each round.  Remember the current flow
7133                                         // so we can start from here for next loop
7134                                         saved_next_sock_id = cfil_info->cfi_sock_id;
7135                                         break;
7136                                 }
7137
7138                                 flow_reported = false;
7139                                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7140                                         entry = &cfil_info->cfi_entries[kcunit - 1];
7141                                         if (entry->cfe_filter == NULL) {
7142 #if STATS_DEBUG
7143                                                 CFIL_LOG(LOG_NOTICE, "CFIL: STATS REPORT - so %llx no filter",
7144                                                     cfil_info->cfi_so ? (uint64_t)VM_KERNEL_ADDRPERM(cfil_info->cfi_so) : 0);
7145 #endif
7146                                                 continue;
7147                                         }
7148
7149                                         if ((entry->cfe_stats_report_frequency > 0) &&
7150                                             cfil_stats_collect_flow_stats_for_filter(kcunit, cfil_info, entry, current_tv) == true) {
7151                                                 flow_reported = true;
7152                                         }
7153                                 }
7154                                 if (flow_reported == true) {
7155                                         flow_count++;
7156                                 }
7157                         }
7158                 }
7159
7160                 if (flow_count > 0) {
7161 #if STATS_DEBUG
7162                         CFIL_LOG(LOG_ERR, "CFIL: STATS reporting for %d flows", flow_count);
7163 #endif
7164                         for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
7165                                 if (global_cfil_stats_report_buffers[kcunit - 1] != NULL &&
7166                                     global_cfil_stats_counts[kcunit - 1] > 0) {
7167                                         cfil_dispatch_stats_event_locked(kcunit,
7168                                             global_cfil_stats_report_buffers[kcunit - 1],
7169                                             global_cfil_stats_counts[kcunit - 1]);
7170                                 }
7171                         }
7172                 } else {
7173                         cfil_rw_unlock_shared(&cfil_lck_rw);
7174                         goto go_sleep;
7175                 }
7176
7177                 cfil_rw_unlock_shared(&cfil_lck_rw);
7178
7179                 // Loop again if we haven't finished the whole cfil_info list
7180         } while (saved_next_sock_id != 0);
7181
7182 go_sleep:
7183
7184         // Sleep forever (until waken up) if no more flow to report
7185         cfil_rw_lock_shared(&cfil_lck_rw);
7186         cfil_stats_report_thread_sleep(cfil_sock_attached_stats_count == 0 ? true : false);
7187         cfil_rw_unlock_shared(&cfil_lck_rw);
7188         thread_block_parameter((thread_continue_t) cfil_stats_report, NULL);
7189         /* NOTREACHED */
7190 }