bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by TCP/IP sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many TCP/IP sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation is limited to TCP sockets.
  50  * - The current implementation supports up to two simultaneous content filters
  51  *   for the sake of simplicity of the implementation.
  52  *
  53  *
  54  * NECP FILTER CONTROL UNIT
  55  *
  56  * A user space filter agent uses the Network Extension Control Policy (NECP)
  57  * database to specify which TCP/IP sockets need to be filtered. The NECP
  58  * criteria may be based on a variety of properties like user ID or proc UUID.
  59  *
  60  * The NECP "filter control unit" is used by the socket content filter subsystem
  61  * to deliver the relevant TCP/IP content information to the appropriate
  62  * user space filter agent via its kernel control socket instance.
  63  * This works as follows:
  64  *
  65  * 1) The user space filter agent specifies an NECP filter control unit when
  66  *    in adds its filtering rules to the NECP database.
  67  *
  68  * 2) The user space filter agent also sets its NECP filter control unit on the
  69  *    content filter kernel control socket via the socket option
  70  *    CFIL_OPT_NECP_CONTROL_UNIT.
  71  *
  72  * 3) The NECP database is consulted to find out if a given TCP/IP socket
  73  *    needs to be subjected to content filtering and returns the corresponding
  74  *    NECP filter control unit  -- the NECP filter control unit is actually
  75  *    stored in the TCP/IP socket structure so the NECP lookup is really simple.
  76  *
  77  * 4) The NECP filter control unit is then used to find the corresponding
  78  *    kernel control socket instance.
  79  *
  80  * Note: NECP currently supports a single filter control unit per TCP/IP socket
  81  *       but this restriction may be soon lifted.
  82  *
  83  *
  84  * THE MESSAGING PROTOCOL
  85  *
  86  * The socket content filter subsystem and a user space filter agent
  87  * communicate over the kernel control socket via an asynchronous
  88  * messaging protocol (this is not a request-response protocol).
  89  * The socket content filter subsystem sends event messages to the user
  90  * space filter agent about the TCP/IP sockets it is interested to filter.
  91  * The user space filter agent sends action messages to either allow
  92  * data to pass or to disallow the data flow (and drop the connection).
  93  *
  94  * All messages over a content filter kernel control socket share the same
  95  * common header of type "struct cfil_msg_hdr". The message type tells if
  96  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  97  * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
  98  * Note the message header length field may be padded for alignment and can
  99  * be larger than the actual content of the message.
 100  * The field "cfm_op" describe the kind of event or action.
 101  *
 102  * Here are the kinds of content filter events:
 103  * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
 104  * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
 105  * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
 106  * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
 107  *
 108  *
 109  * EVENT MESSAGES
 110  *
 111  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 112  * data that is being sent or received. The position of this span of data
 113  * in the data flow is described by a set of start and end offsets. These
 114  * are absolute 64 bits offsets. The first byte sent (or received) starts
 115  * at offset 0 and ends at offset 1. The length of the content data
 116  * is given by the difference between the end offset and the start offset.
 117  *
 118  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 119  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 120  * action message is sent by the user space filter agent.
 121  *
 122  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 123  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 124  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 125  *
 126  * They are two kinds of primary content filter actions:
 127  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 128  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 129  *
 130  * There is also an action to mark a given client flow as already filtered
 131  * at a higher level, CFM_OP_BLESS_CLIENT.
 132  *
 133  *
 134  * ACTION MESSAGES
 135  *
 136  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 137  * agent allow data to flow up to the specified pass offset -- there
 138  * is a pass offset for outgoing data and  a pass offset for incoming data.
 139  * When a new TCP/IP socket is attached to the content filter, each pass offset
 140  * is initially set to 0 so not data is allowed to pass by default.
 141  * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 142  * then the data flow becomes unrestricted.
 143  *
 144  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 145  * with a pass offset smaller than the pass offset of a previous
 146  * CFM_OP_DATA_UPDATE message is silently ignored.
 147  *
 148  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 149  * to tell the kernel how much data it wants to see by using the peek offsets.
 150  * Just like pass offsets, there is a peek offset for each direction.
 151  * When a new TCP/IP socket is attached to the content filter, each peek offset
 152  * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
 153  * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
 154  * with a greater than 0 peek offset is sent by the user space filter agent.
 155  * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 156  * then the flow of update data events becomes unrestricted.
 157  *
 158  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 159  * Also a peek offsets cannot be smaller than the corresponding end offset
 160  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 161  * to set a too small peek value is silently ignored.
 162  *
 163  *
 164  * PER SOCKET "struct cfil_info"
 165  *
 166  * As soon as a TCP/IP socket gets attached to a content filter, a
 167  * "struct cfil_info" is created to hold the content filtering state for this
 168  * socket.
 169  *
 170  * The content filtering state is made of the following information
 171  * for each direction:
 172  * - The current pass offset;
 173  * - The first and last offsets of the data pending, waiting for a filtering
 174  *   decision;
 175  * - The inject queue for data that passed the filters and that needs
 176  *   to be re-injected;
 177  * - A content filter specific state in a set of  "struct cfil_entry"
 178  *
 179  *
 180  * CONTENT FILTER STATE "struct cfil_entry"
 181  *
 182  * The "struct cfil_entry" maintains the information most relevant to the
 183  * message handling over a kernel control socket with a user space filter agent.
 184  *
 185  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 186  * to the kernel control socket unit it corresponds to and also has a pointer
 187  * to the corresponding "struct content_filter".
 188  *
 189  * For each direction, "struct cfil_entry" maintains the following information:
 190  * - The pass offset
 191  * - The peek offset
 192  * - The offset of the last data peeked at by the filter
 193  * - A queue of data that's waiting to be delivered to the  user space filter
 194  *   agent on the kernel control socket
 195  * - A queue of data for which event messages have been sent on the kernel
 196  *   control socket and are pending for a filtering decision.
 197  *
 198  *
 199  * CONTENT FILTER QUEUES
 200  *
 201  * Data that is being filtered is steered away from the TCP/IP socket buffer
 202  * and instead will sit in one of three content filter queues until the data
 203  * can be re-injected into the TCP/IP socket buffer.
 204  *
 205  * A content filter queue is represented by "struct cfil_queue" that contains
 206  * a list of mbufs and the start and end offset of the data span of
 207  * the list of mbufs.
 208  *
 209  * The data moves into the three content filter queues according to this
 210  * sequence:
 211  * a) The "cfe_ctl_q" of "struct cfil_entry"
 212  * b) The "cfe_pending_q" of "struct cfil_entry"
 213  * c) The "cfi_inject_q" of "struct cfil_info"
 214  *
 215  * Note: The sequence (a),(b) may be repeated several times if there is more
 216  * than one content filter attached to the TCP/IP socket.
 217  *
 218  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 219  * kernel conntrol socket for two reasons:
 220  * - The peek offset is less that the end offset of the mbuf data
 221  * - The kernel control socket is flow controlled
 222  *
 223  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 224  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 225  * socket and are waiting for a pass action message fromn the user space
 226  * filter agent. An mbuf length must be fully allowed to pass to be removed
 227  * from the cfe_pending_q.
 228  *
 229  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 230  * by the user space filter agent and that needs to be re-injected into the
 231  * TCP/IP socket.
 232  *
 233  *
 234  * IMPACT ON FLOW CONTROL
 235  *
 236  * An essential aspect of the content filer subsystem is to minimize the
 237  * impact on flow control of the TCP/IP sockets being filtered.
 238  *
 239  * The processing overhead of the content filtering may have an effect on
 240  * flow control by adding noticeable delays and cannot be eliminated --
 241  * care must be taken by the user space filter agent to minimize the
 242  * processing delays.
 243  *
 244  * The amount of data being filtered is kept in buffers while waiting for
 245  * a decision by the user space filter agent. This amount of data pending
 246  * needs to be subtracted from the amount of data available in the
 247  * corresponding TCP/IP socket buffer. This is done by modifying
 248  * sbspace() and tcp_sbspace() to account for amount of data pending
 249  * in the content filter.
 250  *
 251  *
 252  * LOCKING STRATEGY
 253  *
 254  * The global state of content filter subsystem is protected by a single
 255  * read-write lock "cfil_lck_rw". The data flow can be done with the
 256  * cfil read-write lock held as shared so it can be re-entered from multiple
 257  * threads.
 258  *
 259  * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
 260  * protected by the socket lock.
 261  *
 262  * A TCP/IP socket lock cannot be taken while the cfil read-write lock
 263  * is held. That's why we have some sequences where we drop the cfil read-write
 264  * lock before taking the TCP/IP lock.
 265  *
 266  * It is also important to lock the TCP/IP socket buffer while the content
 267  * filter is modifying the amount of pending data. Otherwise the calculations
 268  * in sbspace() and tcp_sbspace()  could be wrong.
 269  *
 270  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 271  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 272  *
 273  * Actually "cfe_link" and "cfe_filter" are protected by both by
 274  * "cfil_lck_rw" and the socket lock: they may be modified only when
 275  * "cfil_lck_rw" is exclusive and the socket is locked.
 276  *
 277  * To read the other fields of "struct content_filter" we have to take
 278  * "cfil_lck_rw" in shared mode.
 279  *
 280  *
 281  * LIMITATIONS
 282  *
 283  * - For TCP sockets only
 284  *
 285  * - Does not support TCP unordered messages
 286  */
 287
 288 /*
 289  *      TO DO LIST
 290  *
 291  *      SOONER:
 292  *
 293  *      Deal with OOB
 294  *
 295  *      LATER:
 296  *
 297  *      If support datagram, enqueue control and address mbufs as well
 298  */
 299
 300 #include <sys/types.h>
 301 #include <sys/kern_control.h>
 302 #include <sys/queue.h>
 303 #include <sys/domain.h>
 304 #include <sys/protosw.h>
 305 #include <sys/syslog.h>
 306 #include <sys/systm.h>
 307 #include <sys/param.h>
 308 #include <sys/mbuf.h>
 309
 310 #include <kern/locks.h>
 311 #include <kern/zalloc.h>
 312 #include <kern/debug.h>
 313
 314 #include <net/content_filter.h>
 315 #include <net/content_filter_crypto.h>
 316
 317 #include <netinet/in_pcb.h>
 318 #include <netinet/tcp.h>
 319 #include <netinet/tcp_var.h>
 320 #include <netinet/udp.h>
 321 #include <netinet/udp_var.h>
 322
 323 #include <string.h>
 324 #include <libkern/libkern.h>
 325 #include <kern/sched_prim.h>
 326 #include <kern/task.h>
 327 #include <mach/task_info.h>
 328
 329 #if !TARGET_OS_OSX && !defined(XNU_TARGET_OS_OSX)
 330 #define MAX_CONTENT_FILTER 2
 331 #else
 332 #define MAX_CONTENT_FILTER 8
 333 #endif
 334
 335 struct cfil_entry;
 336
 337 /*
 338  * The structure content_filter represents a user space content filter
 339  * It's created and associated with a kernel control socket instance
 340  */
 341 struct content_filter {
 342         kern_ctl_ref            cf_kcref;
 343         u_int32_t               cf_kcunit;
 344         u_int32_t               cf_flags;
 345
 346         uint32_t                cf_necp_control_unit;
 347
 348         uint32_t                cf_sock_count;
 349         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 350
 351         cfil_crypto_state_t cf_crypto_state;
 352 };
 353
 354 #define CFF_ACTIVE              0x01
 355 #define CFF_DETACHING           0x02
 356 #define CFF_FLOW_CONTROLLED     0x04
 357
 358 struct content_filter **content_filters = NULL;
 359 uint32_t cfil_active_count = 0; /* Number of active content filters */
 360 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 361 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 362 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 363
 364 static kern_ctl_ref cfil_kctlref = NULL;
 365
 366 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 367 static lck_attr_t *cfil_lck_attr = NULL;
 368 static lck_grp_t *cfil_lck_grp = NULL;
 369 decl_lck_rw_data(static, cfil_lck_rw);
 370
 371 #define CFIL_RW_LCK_MAX 8
 372
 373 int cfil_rw_nxt_lck = 0;
 374 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 375
 376 int cfil_rw_nxt_unlck = 0;
 377 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 378
 379 #define CONTENT_FILTER_ZONE_NAME        "content_filter"
 380 #define CONTENT_FILTER_ZONE_MAX         10
 381 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
 382
 383
 384 #define CFIL_INFO_ZONE_NAME     "cfil_info"
 385 #define CFIL_INFO_ZONE_MAX      1024
 386 static struct zone *cfil_info_zone = NULL;      /* zone for cfil_info */
 387
 388 MBUFQ_HEAD(cfil_mqhead);
 389
 390 struct cfil_queue {
 391         uint64_t                q_start; /* offset of first byte in queue */
 392         uint64_t                q_end; /* offset of last byte in queue */
 393         struct cfil_mqhead      q_mq;
 394 };
 395
 396 /*
 397  * struct cfil_entry
 398  *
 399  * The is one entry per content filter
 400  */
 401 struct cfil_entry {
 402         TAILQ_ENTRY(cfil_entry) cfe_link;
 403         SLIST_ENTRY(cfil_entry) cfe_order_link;
 404         struct content_filter   *cfe_filter;
 405
 406         struct cfil_info        *cfe_cfil_info;
 407         uint32_t                cfe_flags;
 408         uint32_t                cfe_necp_control_unit;
 409         struct timeval          cfe_last_event; /* To user space */
 410         struct timeval          cfe_last_action; /* From user space */
 411
 412         struct cfe_buf {
 413                 /*
 414                  * cfe_pending_q holds data that has been delivered to
 415                  * the filter and for which we are waiting for an action
 416                  */
 417                 struct cfil_queue       cfe_pending_q;
 418                 /*
 419                  * This queue is for data that has not be delivered to
 420                  * the content filter (new data, pass peek or flow control)
 421                  */
 422                 struct cfil_queue       cfe_ctl_q;
 423
 424                 uint64_t                cfe_pass_offset;
 425                 uint64_t                cfe_peek_offset;
 426                 uint64_t                cfe_peeked;
 427         } cfe_snd, cfe_rcv;
 428 };
 429
 430 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 431 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 432 #define CFEF_DATA_START                 0x0004  /* can send data event */
 433 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 434 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 435 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 436 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 437 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 438
 439
 440 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 441                 struct timeval _tdiff;                                                                                          \
 442                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 443                         timersub(t1, t0, &_tdiff);                                                                              \
 444                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 445                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 446                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 447                 }
 448
 449 struct cfil_hash_entry;
 450
 451 /*
 452  * struct cfil_info
 453  *
 454  * There is a struct cfil_info per socket
 455  */
 456 struct cfil_info {
 457         TAILQ_ENTRY(cfil_info)  cfi_link;
 458         struct socket           *cfi_so;
 459         uint64_t                cfi_flags;
 460         uint64_t                cfi_sock_id;
 461         struct timeval64        cfi_first_event;
 462         uint32_t                cfi_op_list_ctr;
 463         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 464         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 465         union sockaddr_in_4_6   cfi_so_attach_faddr;                    /* faddr at the time of attach */
 466         union sockaddr_in_4_6   cfi_so_attach_laddr;                    /* laddr at the time of attach */
 467
 468         int                     cfi_dir;
 469         uint64_t                cfi_byte_inbound_count;
 470         uint64_t                cfi_byte_outbound_count;
 471
 472         boolean_t               cfi_isSignatureLatest;                  /* Indicates if signature covers latest flow attributes */
 473         struct cfi_buf {
 474                 /*
 475                  * cfi_pending_first and cfi_pending_last describe the total
 476                  * amount of data outstanding for all the filters on
 477                  * this socket and data in the flow queue
 478                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 479                  */
 480                 uint64_t                cfi_pending_first;
 481                 uint64_t                cfi_pending_last;
 482                 uint32_t                cfi_pending_mbcnt;
 483                 uint32_t                cfi_pending_mbnum;
 484                 uint32_t                cfi_tail_drop_cnt;
 485                 /*
 486                  * cfi_pass_offset is the minimum of all the filters
 487                  */
 488                 uint64_t                cfi_pass_offset;
 489                 /*
 490                  * cfi_inject_q holds data that needs to be re-injected
 491                  * into the socket after filtering and that can
 492                  * be queued because of flow control
 493                  */
 494                 struct cfil_queue       cfi_inject_q;
 495         } cfi_snd, cfi_rcv;
 496
 497         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 498         struct cfil_hash_entry *cfi_hash_entry;
 499         SLIST_HEAD(, cfil_entry) cfi_ordered_entries;
 500 } __attribute__((aligned(8)));
 501
 502 #define CFIF_DROP               0x0001  /* drop action applied */
 503 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 504 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 505 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 506 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 507 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 508 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 509 #define CFIF_SOCKET_CONNECTED   0x0100  /* socket is connected */
 510 #define CFIF_INITIAL_VERDICT    0x0200  /* received initial verdict */
 511
 512 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 513 #define CFI_SHIFT_GENCNT        32
 514 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 515 #define CFI_SHIFT_FLOWHASH      0
 516
 517 #define CFI_ENTRY_KCUNIT(i, e) (((e) - &((i)->cfi_entries[0])) + 1)
 518
 519 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 520
 521 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 522 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 523
 524 /*
 525  * UDP Socket Support
 526  */
 527 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 528 #define CFILHASHSIZE 16
 529 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 530 #define IS_UDP(so) (so && so->so_proto && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 531 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 532                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 533 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 534                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 535 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 536 #define IS_INITIAL_TFO_DATA(so) (so && (so->so_flags1 & SOF1_PRECONNECT_DATA) && (so->so_state & SS_ISCONNECTING))
 537 #define NULLADDRESS(addr) ((addr.sa.sa_len == 0) || \
 538                            (addr.sa.sa_family == AF_INET && addr.sin.sin_addr.s_addr == 0) || \
 539                            (addr.sa.sa_family == AF_INET6 && IN6_IS_ADDR_UNSPECIFIED(&addr.sin6.sin6_addr)))
 540
 541 /*
 542  * UDP Garbage Collection:
 543  */
 544 static struct thread *cfil_udp_gc_thread;
 545 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 546 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 547 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 548 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 549
 550 /*
 551  * UDP flow queue thresholds
 552  */
 553 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 554 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 555 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 556 /*
 557  * UDP flow queue threshold globals:
 558  */
 559 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 560 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 561
 562 /*
 563  * struct cfil_hash_entry
 564  *
 565  * Hash entry for cfil_info
 566  */
 567 struct cfil_hash_entry {
 568         LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 569         struct cfil_info               *cfentry_cfil;
 570         u_short cfentry_fport;
 571         u_short cfentry_lport;
 572         sa_family_t                    cfentry_family;
 573         u_int32_t                      cfentry_flowhash;
 574         u_int32_t                      cfentry_lastused;
 575         union {
 576                 /* foreign host table entry */
 577                 struct in_addr_4in6 addr46;
 578                 struct in6_addr addr6;
 579         } cfentry_faddr;
 580         union {
 581                 /* local host table entry */
 582                 struct in_addr_4in6 addr46;
 583                 struct in6_addr addr6;
 584         } cfentry_laddr;
 585 };
 586
 587 /*
 588  * struct cfil_db
 589  *
 590  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 591  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 592  */
 593 struct cfil_db {
 594         struct socket       *cfdb_so;
 595         uint32_t            cfdb_count;       /* Number of total content filters */
 596         struct cfilhashhead *cfdb_hashbase;
 597         u_long              cfdb_hashmask;
 598         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 599 };
 600
 601 /*
 602  * CFIL specific mbuf tag:
 603  * Save state of socket at the point of data entry into cfil.
 604  * Use saved state for reinjection at protocol layer.
 605  */
 606 struct cfil_tag {
 607         union sockaddr_in_4_6 cfil_faddr;
 608         uint32_t cfil_so_state_change_cnt;
 609         short cfil_so_options;
 610 };
 611
 612 #define    CFIL_HASH_ENTRY_ZONE_NAME    "cfil_entry_hash"
 613 #define    CFIL_HASH_ENTRY_ZONE_MAX     1024
 614 static struct zone *cfil_hash_entry_zone = NULL;
 615
 616 #define    CFIL_DB_ZONE_NAME       "cfil_db"
 617 #define    CFIL_DB_ZONE_MAX        1024
 618 static struct zone *cfil_db_zone = NULL;
 619
 620 /*
 621  * Statistics
 622  */
 623
 624 struct cfil_stats cfil_stats;
 625
 626 /*
 627  * For troubleshooting
 628  */
 629 int cfil_log_level = LOG_ERR;
 630 int cfil_debug = 1;
 631
 632 // Debug controls added for selective debugging.
 633 // Disabled for production.  If enabled,
 634 // these will have performance impact
 635 #define LIFECYCLE_DEBUG 0
 636 #define VERDICT_DEBUG 0
 637 #define DATA_DEBUG 0
 638 #define SHOW_DEBUG 0
 639 #define GC_DEBUG 0
 640
 641 /*
 642  * Sysctls for logs and statistics
 643  */
 644 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 645     struct sysctl_req *);
 646 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 647     struct sysctl_req *);
 648
 649 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "cfil");
 650
 651 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW | CTLFLAG_LOCKED,
 652     &cfil_log_level, 0, "");
 653
 654 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
 655     &cfil_debug, 0, "");
 656
 657 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 658     &cfil_sock_attached_count, 0, "");
 659
 660 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 661     &cfil_active_count, 0, "");
 662
 663 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
 664     &cfil_close_wait_timeout, 0, "");
 665
 666 static int cfil_sbtrim = 1;
 667 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW | CTLFLAG_LOCKED,
 668     &cfil_sbtrim, 0, "");
 669
 670 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 671     0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat", "");
 672
 673 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD | CTLFLAG_LOCKED,
 674     0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat", "");
 675
 676 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_LOCKED,
 677     &cfil_stats, cfil_stats, "");
 678
 679 /*
 680  * Forward declaration to appease the compiler
 681  */
 682 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 683     uint64_t, uint64_t);
 684 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 685 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 686 static int cfil_action_set_crypto_key(uint32_t, struct cfil_msg_hdr *);
 687 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 688 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 689     struct mbuf *, struct mbuf *, uint32_t);
 690 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 691     struct mbuf *, uint64_t);
 692 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 693     struct in_addr, u_int16_t);
 694 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 695     struct in6_addr *, u_int16_t);
 696
 697 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t, int);
 698 static void cfil_info_free(struct cfil_info *);
 699 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 700 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 701 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 702 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 703 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 704 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 705 static void cfil_info_verify(struct cfil_info *);
 706 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 707     uint64_t, uint64_t);
 708 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 709 static void cfil_release_sockbuf(struct socket *, int);
 710 static int cfil_filters_attached(struct socket *);
 711
 712 static void cfil_rw_lock_exclusive(lck_rw_t *);
 713 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 714 static void cfil_rw_lock_shared(lck_rw_t *);
 715 static void cfil_rw_unlock_shared(lck_rw_t *);
 716 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 717 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 718
 719 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 720 static errno_t cfil_db_init(struct socket *);
 721 static void cfil_db_free(struct socket *so);
 722 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 723 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 724 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 725 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 726 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
 727 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 728 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 729     struct mbuf *, struct mbuf *, uint32_t);
 730 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 731 static void cfil_sock_udp_is_closed(struct socket *);
 732 static int cfil_sock_udp_notify_shutdown(struct socket *, int, int, int);
 733 static int cfil_sock_udp_shutdown(struct socket *, int *);
 734 static void cfil_sock_udp_close_wait(struct socket *);
 735 static void cfil_sock_udp_buf_update(struct sockbuf *);
 736 static int cfil_filters_udp_attached(struct socket *, bool);
 737 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 738     struct in6_addr **, struct in6_addr **,
 739     u_int16_t *, u_int16_t *);
 740 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 741     struct in_addr *, struct in_addr *,
 742     u_int16_t *, u_int16_t *);
 743 static void cfil_info_log(int, struct cfil_info *, const char *);
 744 void cfil_filter_show(u_int32_t);
 745 void cfil_info_show(void);
 746 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
 747 bool cfil_info_action_timed_out(struct cfil_info *, int);
 748 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 749 struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
 750 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 751 static void cfil_info_udp_expire(void *, wait_result_t);
 752 static bool fill_cfil_hash_entry_from_address(struct cfil_hash_entry *, bool, struct sockaddr *);
 753 static void cfil_sock_received_verdict(struct socket *so);
 754 static void cfil_fill_event_msg_addresses(struct cfil_hash_entry *, struct inpcb *,
 755     union sockaddr_in_4_6 *, union sockaddr_in_4_6 *,
 756     boolean_t, boolean_t);
 757
 758 bool check_port(struct sockaddr *, u_short);
 759
 760 /*
 761  * Content filter global read write lock
 762  */
 763
 764 static void
 765 cfil_rw_lock_exclusive(lck_rw_t *lck)
 766 {
 767         void *lr_saved;
 768
 769         lr_saved = __builtin_return_address(0);
 770
 771         lck_rw_lock_exclusive(lck);
 772
 773         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 774         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 775 }
 776
 777 static void
 778 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 779 {
 780         void *lr_saved;
 781
 782         lr_saved = __builtin_return_address(0);
 783
 784         lck_rw_unlock_exclusive(lck);
 785
 786         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 787         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 788 }
 789
 790 static void
 791 cfil_rw_lock_shared(lck_rw_t *lck)
 792 {
 793         void *lr_saved;
 794
 795         lr_saved = __builtin_return_address(0);
 796
 797         lck_rw_lock_shared(lck);
 798
 799         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 800         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 801 }
 802
 803 static void
 804 cfil_rw_unlock_shared(lck_rw_t *lck)
 805 {
 806         void *lr_saved;
 807
 808         lr_saved = __builtin_return_address(0);
 809
 810         lck_rw_unlock_shared(lck);
 811
 812         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 813         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 814 }
 815
 816 static boolean_t
 817 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 818 {
 819         void *lr_saved;
 820         boolean_t upgraded;
 821
 822         lr_saved = __builtin_return_address(0);
 823
 824         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 825         if (upgraded) {
 826                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 827                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 828         }
 829         return upgraded;
 830 }
 831
 832 static void
 833 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 834 {
 835         void *lr_saved;
 836
 837         lr_saved = __builtin_return_address(0);
 838
 839         lck_rw_lock_exclusive_to_shared(lck);
 840
 841         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 842         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 843 }
 844
 845 static void
 846 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 847 {
 848 #if !MACH_ASSERT
 849 #pragma unused(lck, exclusive)
 850 #endif
 851         LCK_RW_ASSERT(lck,
 852             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 853 }
 854
 855 /*
 856  * Return the number of bytes in the mbuf chain using the same
 857  * method as m_length() or sballoc()
 858  *
 859  * Returns data len - starting from PKT start
 860  * - retmbcnt - optional param to get total mbuf bytes in chain
 861  * - retmbnum - optional param to get number of mbufs in chain
 862  */
 863 static unsigned int
 864 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 865 {
 866         struct mbuf *m0;
 867         unsigned int pktlen = 0;
 868         int mbcnt;
 869         int mbnum;
 870
 871         // Locate the start of data
 872         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 873                 if (m0->m_flags & M_PKTHDR) {
 874                         break;
 875                 }
 876         }
 877         if (m0 == NULL) {
 878                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 879                 return 0;
 880         }
 881         m = m0;
 882
 883         if (retmbcnt == NULL && retmbnum == NULL) {
 884                 return m_length(m);
 885         }
 886
 887         pktlen = 0;
 888         mbcnt = 0;
 889         mbnum = 0;
 890         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 891                 pktlen += m0->m_len;
 892                 mbnum++;
 893                 mbcnt += MSIZE;
 894                 if (m0->m_flags & M_EXT) {
 895                         mbcnt += m0->m_ext.ext_size;
 896                 }
 897         }
 898         if (retmbcnt) {
 899                 *retmbcnt = mbcnt;
 900         }
 901         if (retmbnum) {
 902                 *retmbnum = mbnum;
 903         }
 904         return pktlen;
 905 }
 906
 907 static struct mbuf *
 908 cfil_data_start(struct mbuf *m)
 909 {
 910         struct mbuf *m0;
 911
 912         // Locate the start of data
 913         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 914                 if (m0->m_flags & M_PKTHDR) {
 915                         break;
 916                 }
 917         }
 918         return m0;
 919 }
 920
 921 /*
 922  * Common mbuf queue utilities
 923  */
 924
 925 static inline void
 926 cfil_queue_init(struct cfil_queue *cfq)
 927 {
 928         cfq->q_start = 0;
 929         cfq->q_end = 0;
 930         MBUFQ_INIT(&cfq->q_mq);
 931 }
 932
 933 static inline uint64_t
 934 cfil_queue_drain(struct cfil_queue *cfq)
 935 {
 936         uint64_t drained = cfq->q_start - cfq->q_end;
 937         cfq->q_start = 0;
 938         cfq->q_end = 0;
 939         MBUFQ_DRAIN(&cfq->q_mq);
 940
 941         return drained;
 942 }
 943
 944 /* Return 1 when empty, 0 otherwise */
 945 static inline int
 946 cfil_queue_empty(struct cfil_queue *cfq)
 947 {
 948         return MBUFQ_EMPTY(&cfq->q_mq);
 949 }
 950
 951 static inline uint64_t
 952 cfil_queue_offset_first(struct cfil_queue *cfq)
 953 {
 954         return cfq->q_start;
 955 }
 956
 957 static inline uint64_t
 958 cfil_queue_offset_last(struct cfil_queue *cfq)
 959 {
 960         return cfq->q_end;
 961 }
 962
 963 static inline uint64_t
 964 cfil_queue_len(struct cfil_queue *cfq)
 965 {
 966         return cfq->q_end - cfq->q_start;
 967 }
 968
 969 /*
 970  * Routines to verify some fundamental assumptions
 971  */
 972
 973 static void
 974 cfil_queue_verify(struct cfil_queue *cfq)
 975 {
 976         mbuf_t chain;
 977         mbuf_t m;
 978         mbuf_t n;
 979         uint64_t queuesize = 0;
 980
 981         /* Verify offset are ordered */
 982         VERIFY(cfq->q_start <= cfq->q_end);
 983
 984         /*
 985          * When queue is empty, the offsets are equal otherwise the offsets
 986          * are different
 987          */
 988         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
 989             (!MBUFQ_EMPTY(&cfq->q_mq) &&
 990             cfq->q_start != cfq->q_end));
 991
 992         MBUFQ_FOREACH(chain, &cfq->q_mq) {
 993                 size_t chainsize = 0;
 994                 m = chain;
 995                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
 996                 // skip the addr and control stuff if present
 997                 m = cfil_data_start(m);
 998
 999                 if (m == NULL ||
1000                     m == (void *)M_TAG_FREE_PATTERN ||
1001                     m->m_next == (void *)M_TAG_FREE_PATTERN ||
1002                     m->m_nextpkt == (void *)M_TAG_FREE_PATTERN) {
1003                         panic("%s - mq %p is free at %p", __func__,
1004                             &cfq->q_mq, m);
1005                 }
1006                 for (n = m; n != NULL; n = n->m_next) {
1007                         if (n->m_type != MT_DATA &&
1008                             n->m_type != MT_HEADER &&
1009                             n->m_type != MT_OOBDATA) {
1010                                 panic("%s - %p unsupported type %u", __func__,
1011                                     n, n->m_type);
1012                         }
1013                         chainsize += n->m_len;
1014                 }
1015                 if (mlen != chainsize) {
1016                         panic("%s - %p m_length() %u != chainsize %lu",
1017                             __func__, m, mlen, chainsize);
1018                 }
1019                 queuesize += chainsize;
1020         }
1021         if (queuesize != cfq->q_end - cfq->q_start) {
1022                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
1023                     m, queuesize, cfq->q_end - cfq->q_start);
1024         }
1025 }
1026
1027 static void
1028 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
1029 {
1030         CFIL_QUEUE_VERIFY(cfq);
1031
1032         MBUFQ_ENQUEUE(&cfq->q_mq, m);
1033         cfq->q_end += len;
1034
1035         CFIL_QUEUE_VERIFY(cfq);
1036 }
1037
1038 static void
1039 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1040 {
1041         CFIL_QUEUE_VERIFY(cfq);
1042
1043         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1044
1045         MBUFQ_REMOVE(&cfq->q_mq, m);
1046         MBUFQ_NEXT(m) = NULL;
1047         cfq->q_start += len;
1048
1049         CFIL_QUEUE_VERIFY(cfq);
1050 }
1051
1052 static mbuf_t
1053 cfil_queue_first(struct cfil_queue *cfq)
1054 {
1055         return MBUFQ_FIRST(&cfq->q_mq);
1056 }
1057
1058 static mbuf_t
1059 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1060 {
1061 #pragma unused(cfq)
1062         return MBUFQ_NEXT(m);
1063 }
1064
1065 static void
1066 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1067 {
1068         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1069         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1070
1071         /* Verify the queues are ordered so that pending is before ctl */
1072         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1073
1074         /* The peek offset cannot be less than the pass offset */
1075         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1076
1077         /* Make sure we've updated the offset we peeked at  */
1078         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1079 }
1080
1081 static void
1082 cfil_entry_verify(struct cfil_entry *entry)
1083 {
1084         cfil_entry_buf_verify(&entry->cfe_snd);
1085         cfil_entry_buf_verify(&entry->cfe_rcv);
1086 }
1087
1088 static void
1089 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1090 {
1091         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1092
1093         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1094 }
1095
1096 static void
1097 cfil_info_verify(struct cfil_info *cfil_info)
1098 {
1099         int i;
1100
1101         if (cfil_info == NULL) {
1102                 return;
1103         }
1104
1105         cfil_info_buf_verify(&cfil_info->cfi_snd);
1106         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1107
1108         for (i = 0; i < MAX_CONTENT_FILTER; i++) {
1109                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1110         }
1111 }
1112
1113 static void
1114 verify_content_filter(struct content_filter *cfc)
1115 {
1116         struct cfil_entry *entry;
1117         uint32_t count = 0;
1118
1119         VERIFY(cfc->cf_sock_count >= 0);
1120
1121         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1122                 count++;
1123                 VERIFY(cfc == entry->cfe_filter);
1124         }
1125         VERIFY(count == cfc->cf_sock_count);
1126 }
1127
1128 /*
1129  * Kernel control socket callbacks
1130  */
1131 static errno_t
1132 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1133     void **unitinfo)
1134 {
1135         errno_t error = 0;
1136         struct content_filter *cfc = NULL;
1137
1138         CFIL_LOG(LOG_NOTICE, "");
1139
1140         cfc = zalloc(content_filter_zone);
1141         if (cfc == NULL) {
1142                 CFIL_LOG(LOG_ERR, "zalloc failed");
1143                 error = ENOMEM;
1144                 goto done;
1145         }
1146         bzero(cfc, sizeof(struct content_filter));
1147
1148         cfil_rw_lock_exclusive(&cfil_lck_rw);
1149         if (content_filters == NULL) {
1150                 struct content_filter **tmp;
1151
1152                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1153
1154                 MALLOC(tmp,
1155                     struct content_filter **,
1156                     MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1157                     M_TEMP,
1158                     M_WAITOK | M_ZERO);
1159
1160                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1161
1162                 if (tmp == NULL && content_filters == NULL) {
1163                         error = ENOMEM;
1164                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1165                         goto done;
1166                 }
1167                 /* Another thread may have won the race */
1168                 if (content_filters != NULL) {
1169                         FREE(tmp, M_TEMP);
1170                 } else {
1171                         content_filters = tmp;
1172                 }
1173         }
1174
1175         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1176                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1177                 error = EINVAL;
1178         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1179                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1180                 error = EADDRINUSE;
1181         } else {
1182                 /*
1183                  * kernel control socket kcunit numbers start at 1
1184                  */
1185                 content_filters[sac->sc_unit - 1] = cfc;
1186
1187                 cfc->cf_kcref = kctlref;
1188                 cfc->cf_kcunit = sac->sc_unit;
1189                 TAILQ_INIT(&cfc->cf_sock_entries);
1190
1191                 *unitinfo = cfc;
1192                 cfil_active_count++;
1193         }
1194         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1195 done:
1196         if (error != 0 && cfc != NULL) {
1197                 zfree(content_filter_zone, cfc);
1198         }
1199
1200         if (error == 0) {
1201                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1202         } else {
1203                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1204         }
1205
1206         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1207             error, cfil_active_count, sac->sc_unit);
1208
1209         return error;
1210 }
1211
1212 static errno_t
1213 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1214 {
1215 #pragma unused(kctlref)
1216         errno_t error = 0;
1217         struct content_filter *cfc;
1218         struct cfil_entry *entry;
1219         uint64_t sock_flow_id = 0;
1220
1221         CFIL_LOG(LOG_NOTICE, "");
1222
1223         if (content_filters == NULL) {
1224                 CFIL_LOG(LOG_ERR, "no content filter");
1225                 error = EINVAL;
1226                 goto done;
1227         }
1228         if (kcunit > MAX_CONTENT_FILTER) {
1229                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1230                     kcunit, MAX_CONTENT_FILTER);
1231                 error = EINVAL;
1232                 goto done;
1233         }
1234
1235         cfc = (struct content_filter *)unitinfo;
1236         if (cfc == NULL) {
1237                 goto done;
1238         }
1239
1240         cfil_rw_lock_exclusive(&cfil_lck_rw);
1241         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1242                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1243                     kcunit);
1244                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1245                 goto done;
1246         }
1247         cfc->cf_flags |= CFF_DETACHING;
1248         /*
1249          * Remove all sockets from the filter
1250          */
1251         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1252                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1253
1254                 verify_content_filter(cfc);
1255                 /*
1256                  * Accept all outstanding data by pushing to next filter
1257                  * or back to socket
1258                  *
1259                  * TBD: Actually we should make sure all data has been pushed
1260                  * back to socket
1261                  */
1262                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1263                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1264                         struct socket *so = cfil_info->cfi_so;
1265                         sock_flow_id = cfil_info->cfi_sock_id;
1266
1267                         /* Need to let data flow immediately */
1268                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1269                             CFEF_DATA_START;
1270
1271                         /*
1272                          * Respect locking hierarchy
1273                          */
1274                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1275
1276                         socket_lock(so, 1);
1277
1278                         /*
1279                          * When cfe_filter is NULL the filter is detached
1280                          * and the entry has been removed from cf_sock_entries
1281                          */
1282                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1283                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1284                                 goto release;
1285                         }
1286
1287                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1288                             CFM_MAX_OFFSET,
1289                             CFM_MAX_OFFSET);
1290
1291                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1292                             CFM_MAX_OFFSET,
1293                             CFM_MAX_OFFSET);
1294
1295                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1296
1297                         /*
1298                          * Check again to make sure if the cfil_info is still valid
1299                          * as the socket may have been unlocked when when calling
1300                          * cfil_acquire_sockbuf()
1301                          */
1302                         if (entry->cfe_filter == NULL ||
1303                             (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1304                                 goto release;
1305                         }
1306
1307                         /* The filter is now detached */
1308                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1309 #if LIFECYCLE_DEBUG
1310                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1311 #endif
1312                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1313                             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1314                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1315                             cfil_filters_attached(so) == 0) {
1316                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1317                                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1318                                 wakeup((caddr_t)cfil_info);
1319                         }
1320
1321                         /*
1322                          * Remove the filter entry from the content filter
1323                          * but leave the rest of the state intact as the queues
1324                          * may not be empty yet
1325                          */
1326                         entry->cfe_filter = NULL;
1327                         entry->cfe_necp_control_unit = 0;
1328
1329                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1330                         cfc->cf_sock_count--;
1331 release:
1332                         socket_unlock(so, 1);
1333                 }
1334         }
1335         verify_content_filter(cfc);
1336
1337         VERIFY(cfc->cf_sock_count == 0);
1338
1339         /*
1340          * Make filter inactive
1341          */
1342         content_filters[kcunit - 1] = NULL;
1343         cfil_active_count--;
1344         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1345
1346         if (cfc->cf_crypto_state != NULL) {
1347                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
1348                 cfc->cf_crypto_state = NULL;
1349         }
1350
1351         zfree(content_filter_zone, cfc);
1352 done:
1353         if (error == 0) {
1354                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1355         } else {
1356                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1357         }
1358
1359         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1360             error, cfil_active_count, kcunit);
1361
1362         return error;
1363 }
1364
1365 /*
1366  * cfil_acquire_sockbuf()
1367  *
1368  * Prevent any other thread from acquiring the sockbuf
1369  * We use sb_cfil_thread as a semaphore to prevent other threads from
1370  * messing with the sockbuf -- see sblock()
1371  * Note: We do not set SB_LOCK here because the thread may check or modify
1372  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1373  * sblock(), sbunlock() or sodefunct()
1374  */
1375 static int
1376 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1377 {
1378         thread_t tp = current_thread();
1379         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1380         lck_mtx_t *mutex_held;
1381         int error = 0;
1382
1383         /*
1384          * Wait until no thread is holding the sockbuf and other content
1385          * filter threads have released the sockbuf
1386          */
1387         while ((sb->sb_flags & SB_LOCK) ||
1388             (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1389                 if (so->so_proto->pr_getlock != NULL) {
1390                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1391                 } else {
1392                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1393                 }
1394
1395                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1396
1397                 sb->sb_wantlock++;
1398                 VERIFY(sb->sb_wantlock != 0);
1399
1400                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1401                     NULL);
1402
1403                 VERIFY(sb->sb_wantlock != 0);
1404                 sb->sb_wantlock--;
1405         }
1406         /*
1407          * Use reference count for repetitive calls on same thread
1408          */
1409         if (sb->sb_cfil_refs == 0) {
1410                 VERIFY(sb->sb_cfil_thread == NULL);
1411                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1412
1413                 sb->sb_cfil_thread = tp;
1414                 sb->sb_flags |= SB_LOCK;
1415         }
1416         sb->sb_cfil_refs++;
1417
1418         /* We acquire the socket buffer when we need to cleanup */
1419         if (cfil_info == NULL) {
1420                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1421                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1422                 error = 0;
1423         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1424                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1425                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1426                 error = EPIPE;
1427         }
1428
1429         return error;
1430 }
1431
1432 static void
1433 cfil_release_sockbuf(struct socket *so, int outgoing)
1434 {
1435         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1436         thread_t tp = current_thread();
1437
1438         socket_lock_assert_owned(so);
1439
1440         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp) {
1441                 panic("%s sb_cfil_thread %p not current %p", __func__,
1442                     sb->sb_cfil_thread, tp);
1443         }
1444         /*
1445          * Don't panic if we are defunct because SB_LOCK has
1446          * been cleared by sodefunct()
1447          */
1448         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK)) {
1449                 panic("%s SB_LOCK not set on %p", __func__,
1450                     sb);
1451         }
1452         /*
1453          * We can unlock when the thread unwinds to the last reference
1454          */
1455         sb->sb_cfil_refs--;
1456         if (sb->sb_cfil_refs == 0) {
1457                 sb->sb_cfil_thread = NULL;
1458                 sb->sb_flags &= ~SB_LOCK;
1459
1460                 if (sb->sb_wantlock > 0) {
1461                         wakeup(&sb->sb_flags);
1462                 }
1463         }
1464 }
1465
1466 cfil_sock_id_t
1467 cfil_sock_id_from_socket(struct socket *so)
1468 {
1469         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil) {
1470                 return so->so_cfil->cfi_sock_id;
1471         } else {
1472                 return CFIL_SOCK_ID_NONE;
1473         }
1474 }
1475
1476 static bool
1477 cfil_socket_safe_lock(struct inpcb *inp)
1478 {
1479         if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1480                 socket_lock(inp->inp_socket, 1);
1481                 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1482                         return true;
1483                 }
1484                 socket_unlock(inp->inp_socket, 1);
1485         }
1486         return false;
1487 }
1488
1489 static struct socket *
1490 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1491 {
1492         struct socket *so = NULL;
1493         u_int64_t gencnt = cfil_sock_id >> 32;
1494         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1495         struct inpcb *inp = NULL;
1496         struct inpcbinfo *pcbinfo = NULL;
1497
1498 #if VERDICT_DEBUG
1499         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1500 #endif
1501
1502         if (udp_only) {
1503                 goto find_udp;
1504         }
1505
1506         pcbinfo = &tcbinfo;
1507         lck_rw_lock_shared(pcbinfo->ipi_lock);
1508         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1509                 if (inp->inp_state != INPCB_STATE_DEAD &&
1510                     inp->inp_socket != NULL &&
1511                     inp->inp_flowhash == flowhash &&
1512                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1513                     inp->inp_socket->so_cfil != NULL) {
1514                         if (cfil_socket_safe_lock(inp)) {
1515                                 so = inp->inp_socket;
1516                         }
1517                         break;
1518                 }
1519         }
1520         lck_rw_done(pcbinfo->ipi_lock);
1521         if (so != NULL) {
1522                 goto done;
1523         }
1524
1525 find_udp:
1526
1527         pcbinfo = &udbinfo;
1528         lck_rw_lock_shared(pcbinfo->ipi_lock);
1529         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1530                 if (inp->inp_state != INPCB_STATE_DEAD &&
1531                     inp->inp_socket != NULL &&
1532                     inp->inp_socket->so_cfil_db != NULL &&
1533                     (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1534                         if (cfil_socket_safe_lock(inp)) {
1535                                 so = inp->inp_socket;
1536                         }
1537                         break;
1538                 }
1539         }
1540         lck_rw_done(pcbinfo->ipi_lock);
1541
1542 done:
1543         if (so == NULL) {
1544                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1545                 CFIL_LOG(LOG_DEBUG,
1546                     "no socket for sock_id %llx gencnt %llx flowhash %x",
1547                     cfil_sock_id, gencnt, flowhash);
1548         }
1549
1550         return so;
1551 }
1552
1553 static struct socket *
1554 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1555 {
1556         struct socket *so = NULL;
1557         struct inpcb *inp = NULL;
1558         struct inpcbinfo *pcbinfo = &tcbinfo;
1559
1560         lck_rw_lock_shared(pcbinfo->ipi_lock);
1561         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1562                 if (inp->inp_state != INPCB_STATE_DEAD &&
1563                     inp->inp_socket != NULL &&
1564                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1565                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1566                         if (cfil_socket_safe_lock(inp)) {
1567                                 so = inp->inp_socket;
1568                         }
1569                         break;
1570                 }
1571         }
1572         lck_rw_done(pcbinfo->ipi_lock);
1573         if (so != NULL) {
1574                 goto done;
1575         }
1576
1577         pcbinfo = &udbinfo;
1578         lck_rw_lock_shared(pcbinfo->ipi_lock);
1579         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1580                 if (inp->inp_state != INPCB_STATE_DEAD &&
1581                     inp->inp_socket != NULL &&
1582                     uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1583                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1584                         if (cfil_socket_safe_lock(inp)) {
1585                                 so = inp->inp_socket;
1586                         }
1587                         break;
1588                 }
1589         }
1590         lck_rw_done(pcbinfo->ipi_lock);
1591
1592 done:
1593         return so;
1594 }
1595
1596 static errno_t
1597 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1598     int flags)
1599 {
1600 #pragma unused(kctlref, flags)
1601         errno_t error = 0;
1602         struct cfil_msg_hdr *msghdr;
1603         struct content_filter *cfc = (struct content_filter *)unitinfo;
1604         struct socket *so;
1605         struct cfil_msg_action *action_msg;
1606         struct cfil_entry *entry;
1607         struct cfil_info *cfil_info = NULL;
1608         unsigned int data_len = 0;
1609
1610         CFIL_LOG(LOG_INFO, "");
1611
1612         if (content_filters == NULL) {
1613                 CFIL_LOG(LOG_ERR, "no content filter");
1614                 error = EINVAL;
1615                 goto done;
1616         }
1617         if (kcunit > MAX_CONTENT_FILTER) {
1618                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1619                     kcunit, MAX_CONTENT_FILTER);
1620                 error = EINVAL;
1621                 goto done;
1622         }
1623         if (m == NULL) {
1624                 CFIL_LOG(LOG_ERR, "null mbuf");
1625                 error = EINVAL;
1626                 goto done;
1627         }
1628         data_len = m_length(m);
1629
1630         if (data_len < sizeof(struct cfil_msg_hdr)) {
1631                 CFIL_LOG(LOG_ERR, "too short %u", data_len);
1632                 error = EINVAL;
1633                 goto done;
1634         }
1635         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1636         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1637                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1638                 error = EINVAL;
1639                 goto done;
1640         }
1641         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1642                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1643                 error = EINVAL;
1644                 goto done;
1645         }
1646         if (msghdr->cfm_len > data_len) {
1647                 CFIL_LOG(LOG_ERR, "bad length %u", msghdr->cfm_len);
1648                 error = EINVAL;
1649                 goto done;
1650         }
1651
1652         /* Validate action operation */
1653         switch (msghdr->cfm_op) {
1654         case CFM_OP_DATA_UPDATE:
1655                 OSIncrementAtomic(
1656                         &cfil_stats.cfs_ctl_action_data_update);
1657                 break;
1658         case CFM_OP_DROP:
1659                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1660                 break;
1661         case CFM_OP_BLESS_CLIENT:
1662                 if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1663                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1664                         error = EINVAL;
1665                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1666                             msghdr->cfm_len,
1667                             msghdr->cfm_op);
1668                         goto done;
1669                 }
1670                 error = cfil_action_bless_client(kcunit, msghdr);
1671                 goto done;
1672         case CFM_OP_SET_CRYPTO_KEY:
1673                 if (msghdr->cfm_len != sizeof(struct cfil_msg_set_crypto_key)) {
1674                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1675                         error = EINVAL;
1676                         CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1677                             msghdr->cfm_len,
1678                             msghdr->cfm_op);
1679                         goto done;
1680                 }
1681                 error = cfil_action_set_crypto_key(kcunit, msghdr);
1682                 goto done;
1683         default:
1684                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1685                 CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1686                 error = EINVAL;
1687                 goto done;
1688         }
1689         if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1690                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1691                 error = EINVAL;
1692                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1693                     msghdr->cfm_len,
1694                     msghdr->cfm_op);
1695                 goto done;
1696         }
1697         cfil_rw_lock_shared(&cfil_lck_rw);
1698         if (cfc != (void *)content_filters[kcunit - 1]) {
1699                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1700                     kcunit);
1701                 error = EINVAL;
1702                 cfil_rw_unlock_shared(&cfil_lck_rw);
1703                 goto done;
1704         }
1705         cfil_rw_unlock_shared(&cfil_lck_rw);
1706
1707         // Search for socket (TCP+UDP and lock so)
1708         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1709         if (so == NULL) {
1710                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1711                     msghdr->cfm_sock_id);
1712                 error = EINVAL;
1713                 goto done;
1714         }
1715
1716         cfil_info = so->so_cfil_db != NULL ?
1717             cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1718
1719         if (cfil_info == NULL) {
1720                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1721                     (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1722                 error = EINVAL;
1723                 goto unlock;
1724         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1725                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1726                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1727                 error = EINVAL;
1728                 goto unlock;
1729         }
1730         entry = &cfil_info->cfi_entries[kcunit - 1];
1731         if (entry->cfe_filter == NULL) {
1732                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1733                     (uint64_t)VM_KERNEL_ADDRPERM(so));
1734                 error = EINVAL;
1735                 goto unlock;
1736         }
1737
1738         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) {
1739                 entry->cfe_flags |= CFEF_DATA_START;
1740         } else {
1741                 CFIL_LOG(LOG_ERR,
1742                     "so %llx attached not sent for %u",
1743                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1744                 error = EINVAL;
1745                 goto unlock;
1746         }
1747
1748         microuptime(&entry->cfe_last_action);
1749         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1750
1751         action_msg = (struct cfil_msg_action *)msghdr;
1752
1753         switch (msghdr->cfm_op) {
1754         case CFM_OP_DATA_UPDATE:
1755 #if VERDICT_DEBUG
1756                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1757                     (uint64_t)VM_KERNEL_ADDRPERM(so),
1758                     cfil_info->cfi_sock_id,
1759                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1760                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1761 #endif
1762                 /*
1763                  * Received verdict, at this point we know this
1764                  * socket connection is allowed.  Unblock thread
1765                  * immediately before proceeding to process the verdict.
1766                  */
1767                 cfil_sock_received_verdict(so);
1768
1769                 if (action_msg->cfa_out_peek_offset != 0 ||
1770                     action_msg->cfa_out_pass_offset != 0) {
1771                         error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1772                             action_msg->cfa_out_pass_offset,
1773                             action_msg->cfa_out_peek_offset);
1774                 }
1775                 if (error == EJUSTRETURN) {
1776                         error = 0;
1777                 }
1778                 if (error != 0) {
1779                         break;
1780                 }
1781                 if (action_msg->cfa_in_peek_offset != 0 ||
1782                     action_msg->cfa_in_pass_offset != 0) {
1783                         error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1784                             action_msg->cfa_in_pass_offset,
1785                             action_msg->cfa_in_peek_offset);
1786                 }
1787                 if (error == EJUSTRETURN) {
1788                         error = 0;
1789                 }
1790                 break;
1791
1792         case CFM_OP_DROP:
1793 #if VERDICT_DEBUG
1794                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT DROP RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1795                     (uint64_t)VM_KERNEL_ADDRPERM(so),
1796                     cfil_info->cfi_sock_id,
1797                     action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1798                     action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1799 #endif
1800                 error = cfil_action_drop(so, cfil_info, kcunit);
1801                 cfil_sock_received_verdict(so);
1802                 break;
1803
1804         default:
1805                 error = EINVAL;
1806                 break;
1807         }
1808 unlock:
1809         socket_unlock(so, 1);
1810 done:
1811         mbuf_freem(m);
1812
1813         if (error == 0) {
1814                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1815         } else {
1816                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1817         }
1818
1819         return error;
1820 }
1821
1822 static errno_t
1823 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1824     int opt, void *data, size_t *len)
1825 {
1826 #pragma unused(kctlref, opt)
1827         struct cfil_info *cfil_info = NULL;
1828         errno_t error = 0;
1829         struct content_filter *cfc = (struct content_filter *)unitinfo;
1830
1831         CFIL_LOG(LOG_NOTICE, "");
1832
1833         cfil_rw_lock_shared(&cfil_lck_rw);
1834
1835         if (content_filters == NULL) {
1836                 CFIL_LOG(LOG_ERR, "no content filter");
1837                 error = EINVAL;
1838                 goto done;
1839         }
1840         if (kcunit > MAX_CONTENT_FILTER) {
1841                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1842                     kcunit, MAX_CONTENT_FILTER);
1843                 error = EINVAL;
1844                 goto done;
1845         }
1846         if (cfc != (void *)content_filters[kcunit - 1]) {
1847                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1848                     kcunit);
1849                 error = EINVAL;
1850                 goto done;
1851         }
1852         switch (opt) {
1853         case CFIL_OPT_NECP_CONTROL_UNIT:
1854                 if (*len < sizeof(uint32_t)) {
1855                         CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1856                         error = EINVAL;
1857                         goto done;
1858                 }
1859                 if (data != NULL) {
1860                         *(uint32_t *)data = cfc->cf_necp_control_unit;
1861                 }
1862                 break;
1863         case CFIL_OPT_GET_SOCKET_INFO:
1864                 if (*len != sizeof(struct cfil_opt_sock_info)) {
1865                         CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1866                         error = EINVAL;
1867                         goto done;
1868                 }
1869                 if (data == NULL) {
1870                         CFIL_LOG(LOG_ERR, "data not passed");
1871                         error = EINVAL;
1872                         goto done;
1873                 }
1874
1875                 struct cfil_opt_sock_info *sock_info =
1876                     (struct cfil_opt_sock_info *) data;
1877
1878                 // Unlock here so that we never hold both cfil_lck_rw and the
1879                 // socket_lock at the same time. Otherwise, this can deadlock
1880                 // because soclose() takes the socket_lock and then exclusive
1881                 // cfil_lck_rw and we require the opposite order.
1882
1883                 // WARNING: Be sure to never use anything protected
1884                 //     by cfil_lck_rw beyond this point.
1885                 // WARNING: Be sure to avoid fallthrough and
1886                 //     goto return_already_unlocked from this branch.
1887                 cfil_rw_unlock_shared(&cfil_lck_rw);
1888
1889                 // Search (TCP+UDP) and lock socket
1890                 struct socket *sock =
1891                     cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1892                 if (sock == NULL) {
1893 #if LIFECYCLE_DEBUG
1894                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1895                             sock_info->cfs_sock_id);
1896 #endif
1897                         error = ENOENT;
1898                         goto return_already_unlocked;
1899                 }
1900
1901                 cfil_info = (sock->so_cfil_db != NULL) ?
1902                     cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
1903
1904                 if (cfil_info == NULL) {
1905 #if LIFECYCLE_DEBUG
1906                         CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1907                             (uint64_t)VM_KERNEL_ADDRPERM(sock));
1908 #endif
1909                         error = EINVAL;
1910                         socket_unlock(sock, 1);
1911                         goto return_already_unlocked;
1912                 }
1913
1914                 // Fill out family, type, and protocol
1915                 sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1916                 sock_info->cfs_sock_type = sock->so_proto->pr_type;
1917                 sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1918
1919                 // Source and destination addresses
1920                 struct inpcb *inp = sotoinpcb(sock);
1921                 if (inp->inp_vflag & INP_IPV6) {
1922                         struct in6_addr *laddr = NULL, *faddr = NULL;
1923                         u_int16_t lport = 0, fport = 0;
1924
1925                         cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1926                             &laddr, &faddr, &lport, &fport);
1927                         fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1928                         fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1929                 } else if (inp->inp_vflag & INP_IPV4) {
1930                         struct in_addr laddr = {.s_addr = 0}, faddr = {.s_addr = 0};
1931                         u_int16_t lport = 0, fport = 0;
1932
1933                         cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1934                             &laddr, &faddr, &lport, &fport);
1935                         fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1936                         fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1937                 }
1938
1939                 // Set the pid info
1940                 sock_info->cfs_pid = sock->last_pid;
1941                 memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1942
1943                 if (sock->so_flags & SOF_DELEGATED) {
1944                         sock_info->cfs_e_pid = sock->e_pid;
1945                         memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1946                 } else {
1947                         sock_info->cfs_e_pid = sock->last_pid;
1948                         memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1949                 }
1950
1951                 socket_unlock(sock, 1);
1952
1953                 goto return_already_unlocked;
1954         default:
1955                 error = ENOPROTOOPT;
1956                 break;
1957         }
1958 done:
1959         cfil_rw_unlock_shared(&cfil_lck_rw);
1960
1961         return error;
1962
1963 return_already_unlocked:
1964
1965         return error;
1966 }
1967
1968 static errno_t
1969 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1970     int opt, void *data, size_t len)
1971 {
1972 #pragma unused(kctlref, opt)
1973         errno_t error = 0;
1974         struct content_filter *cfc = (struct content_filter *)unitinfo;
1975
1976         CFIL_LOG(LOG_NOTICE, "");
1977
1978         cfil_rw_lock_exclusive(&cfil_lck_rw);
1979
1980         if (content_filters == NULL) {
1981                 CFIL_LOG(LOG_ERR, "no content filter");
1982                 error = EINVAL;
1983                 goto done;
1984         }
1985         if (kcunit > MAX_CONTENT_FILTER) {
1986                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1987                     kcunit, MAX_CONTENT_FILTER);
1988                 error = EINVAL;
1989                 goto done;
1990         }
1991         if (cfc != (void *)content_filters[kcunit - 1]) {
1992                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1993                     kcunit);
1994                 error = EINVAL;
1995                 goto done;
1996         }
1997         switch (opt) {
1998         case CFIL_OPT_NECP_CONTROL_UNIT:
1999                 if (len < sizeof(uint32_t)) {
2000                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2001                             "len too small %lu", len);
2002                         error = EINVAL;
2003                         goto done;
2004                 }
2005                 if (cfc->cf_necp_control_unit != 0) {
2006                         CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
2007                             "already set %u",
2008                             cfc->cf_necp_control_unit);
2009                         error = EINVAL;
2010                         goto done;
2011                 }
2012                 cfc->cf_necp_control_unit = *(uint32_t *)data;
2013                 break;
2014         default:
2015                 error = ENOPROTOOPT;
2016                 break;
2017         }
2018 done:
2019         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2020
2021         return error;
2022 }
2023
2024
2025 static void
2026 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
2027 {
2028 #pragma unused(kctlref, flags)
2029         struct content_filter *cfc = (struct content_filter *)unitinfo;
2030         struct socket *so = NULL;
2031         int error;
2032         struct cfil_entry *entry;
2033         struct cfil_info *cfil_info = NULL;
2034
2035         CFIL_LOG(LOG_INFO, "");
2036
2037         if (content_filters == NULL) {
2038                 CFIL_LOG(LOG_ERR, "no content filter");
2039                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2040                 return;
2041         }
2042         if (kcunit > MAX_CONTENT_FILTER) {
2043                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
2044                     kcunit, MAX_CONTENT_FILTER);
2045                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2046                 return;
2047         }
2048         cfil_rw_lock_shared(&cfil_lck_rw);
2049         if (cfc != (void *)content_filters[kcunit - 1]) {
2050                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
2051                     kcunit);
2052                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
2053                 goto done;
2054         }
2055         /* Let's assume the flow control is lifted */
2056         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2057                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
2058                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2059                 }
2060
2061                 cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
2062
2063                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
2064                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
2065         }
2066         /*
2067          * Flow control will be raised again as soon as an entry cannot enqueue
2068          * to the kernel control socket
2069          */
2070         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
2071                 verify_content_filter(cfc);
2072
2073                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
2074
2075                 /* Find an entry that is flow controlled */
2076                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
2077                         if (entry->cfe_cfil_info == NULL ||
2078                             entry->cfe_cfil_info->cfi_so == NULL) {
2079                                 continue;
2080                         }
2081                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0) {
2082                                 continue;
2083                         }
2084                 }
2085                 if (entry == NULL) {
2086                         break;
2087                 }
2088
2089                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
2090
2091                 cfil_info = entry->cfe_cfil_info;
2092                 so = cfil_info->cfi_so;
2093
2094                 cfil_rw_unlock_shared(&cfil_lck_rw);
2095                 socket_lock(so, 1);
2096
2097                 do {
2098                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
2099                         if (error == 0) {
2100                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
2101                         }
2102                         cfil_release_sockbuf(so, 1);
2103                         if (error != 0) {
2104                                 break;
2105                         }
2106
2107                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
2108                         if (error == 0) {
2109                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
2110                         }
2111                         cfil_release_sockbuf(so, 0);
2112                 } while (0);
2113
2114                 socket_lock_assert_owned(so);
2115                 socket_unlock(so, 1);
2116
2117                 cfil_rw_lock_shared(&cfil_lck_rw);
2118         }
2119 done:
2120         cfil_rw_unlock_shared(&cfil_lck_rw);
2121 }
2122
2123 void
2124 cfil_init(void)
2125 {
2126         struct kern_ctl_reg kern_ctl;
2127         errno_t error = 0;
2128         vm_size_t content_filter_size = 0;      /* size of content_filter */
2129         vm_size_t cfil_info_size = 0;   /* size of cfil_info */
2130         vm_size_t cfil_hash_entry_size = 0; /* size of cfil_hash_entry */
2131         vm_size_t cfil_db_size = 0; /* size of cfil_db */
2132         unsigned int mbuf_limit = 0;
2133
2134         CFIL_LOG(LOG_NOTICE, "");
2135
2136         /*
2137          * Compile time verifications
2138          */
2139         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2140         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2141         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2142         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2143
2144         /*
2145          * Runtime time verifications
2146          */
2147         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2148             sizeof(uint32_t)));
2149         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2150             sizeof(uint32_t)));
2151         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2152             sizeof(uint32_t)));
2153         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2154             sizeof(uint32_t)));
2155
2156         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2157             sizeof(uint32_t)));
2158         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2159             sizeof(uint32_t)));
2160
2161         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2162             sizeof(uint32_t)));
2163         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2164             sizeof(uint32_t)));
2165         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2166             sizeof(uint32_t)));
2167         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2168             sizeof(uint32_t)));
2169
2170         /*
2171          * Zone for content filters kernel control sockets
2172          */
2173         content_filter_size = sizeof(struct content_filter);
2174         content_filter_zone = zinit(content_filter_size,
2175             CONTENT_FILTER_ZONE_MAX * content_filter_size,
2176             0,
2177             CONTENT_FILTER_ZONE_NAME);
2178         if (content_filter_zone == NULL) {
2179                 panic("%s: zinit(%s) failed", __func__,
2180                     CONTENT_FILTER_ZONE_NAME);
2181                 /* NOTREACHED */
2182         }
2183         zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2184         zone_change(content_filter_zone, Z_EXPAND, TRUE);
2185
2186         /*
2187          * Zone for per socket content filters
2188          */
2189         cfil_info_size = sizeof(struct cfil_info);
2190         cfil_info_zone = zinit(cfil_info_size,
2191             CFIL_INFO_ZONE_MAX * cfil_info_size,
2192             0,
2193             CFIL_INFO_ZONE_NAME);
2194         if (cfil_info_zone == NULL) {
2195                 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2196                 /* NOTREACHED */
2197         }
2198         zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2199         zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2200
2201         /*
2202          * Zone for content filters cfil hash entries and db
2203          */
2204         cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2205         cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2206             CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2207             0,
2208             CFIL_HASH_ENTRY_ZONE_NAME);
2209         if (cfil_hash_entry_zone == NULL) {
2210                 panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2211                 /* NOTREACHED */
2212         }
2213         zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2214         zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2215
2216         cfil_db_size = sizeof(struct cfil_db);
2217         cfil_db_zone = zinit(cfil_db_size,
2218             CFIL_DB_ZONE_MAX * cfil_db_size,
2219             0,
2220             CFIL_DB_ZONE_NAME);
2221         if (cfil_db_zone == NULL) {
2222                 panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2223                 /* NOTREACHED */
2224         }
2225         zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2226         zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2227
2228         /*
2229          * Allocate locks
2230          */
2231         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2232         if (cfil_lck_grp_attr == NULL) {
2233                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2234                 /* NOTREACHED */
2235         }
2236         cfil_lck_grp = lck_grp_alloc_init("content filter",
2237             cfil_lck_grp_attr);
2238         if (cfil_lck_grp == NULL) {
2239                 panic("%s: lck_grp_alloc_init failed", __func__);
2240                 /* NOTREACHED */
2241         }
2242         cfil_lck_attr = lck_attr_alloc_init();
2243         if (cfil_lck_attr == NULL) {
2244                 panic("%s: lck_attr_alloc_init failed", __func__);
2245                 /* NOTREACHED */
2246         }
2247         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2248
2249         TAILQ_INIT(&cfil_sock_head);
2250
2251         /*
2252          * Register kernel control
2253          */
2254         bzero(&kern_ctl, sizeof(kern_ctl));
2255         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2256             sizeof(kern_ctl.ctl_name));
2257         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2258         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2259         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2260         kern_ctl.ctl_connect = cfil_ctl_connect;
2261         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2262         kern_ctl.ctl_send = cfil_ctl_send;
2263         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2264         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2265         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2266         error = ctl_register(&kern_ctl, &cfil_kctlref);
2267         if (error != 0) {
2268                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2269                 return;
2270         }
2271
2272         // Spawn thread for gargage collection
2273         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2274             &cfil_udp_gc_thread) != KERN_SUCCESS) {
2275                 panic_plain("%s: Can't create UDP GC thread", __func__);
2276                 /* NOTREACHED */
2277         }
2278         /* this must not fail */
2279         VERIFY(cfil_udp_gc_thread != NULL);
2280
2281         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2282         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2283         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2284         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2285 }
2286
2287 struct cfil_info *
2288 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2289 {
2290         int kcunit;
2291         struct cfil_info *cfil_info = NULL;
2292         struct inpcb *inp = sotoinpcb(so);
2293
2294         CFIL_LOG(LOG_INFO, "");
2295
2296         socket_lock_assert_owned(so);
2297
2298         cfil_info = zalloc(cfil_info_zone);
2299         if (cfil_info == NULL) {
2300                 goto done;
2301         }
2302         bzero(cfil_info, sizeof(struct cfil_info));
2303
2304         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2305         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2306
2307         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2308                 struct cfil_entry *entry;
2309
2310                 entry = &cfil_info->cfi_entries[kcunit - 1];
2311                 entry->cfe_cfil_info = cfil_info;
2312
2313                 /* Initialize the filter entry */
2314                 entry->cfe_filter = NULL;
2315                 entry->cfe_flags = 0;
2316                 entry->cfe_necp_control_unit = 0;
2317                 entry->cfe_snd.cfe_pass_offset = 0;
2318                 entry->cfe_snd.cfe_peek_offset = 0;
2319                 entry->cfe_snd.cfe_peeked = 0;
2320                 entry->cfe_rcv.cfe_pass_offset = 0;
2321                 entry->cfe_rcv.cfe_peek_offset = 0;
2322                 entry->cfe_rcv.cfe_peeked = 0;
2323                 /*
2324                  * Timestamp the last action to avoid pre-maturely
2325                  * triggering garbage collection
2326                  */
2327                 microuptime(&entry->cfe_last_action);
2328
2329                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2330                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2331                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2332                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2333         }
2334
2335         cfil_rw_lock_exclusive(&cfil_lck_rw);
2336
2337         /*
2338          * Create a cfi_sock_id that's not the socket pointer!
2339          */
2340
2341         if (hash_entry == NULL) {
2342                 // This is the TCP case, cfil_info is tracked per socket
2343                 if (inp->inp_flowhash == 0) {
2344                         inp->inp_flowhash = inp_calc_flowhash(inp);
2345                 }
2346
2347                 so->so_cfil = cfil_info;
2348                 cfil_info->cfi_so = so;
2349                 cfil_info->cfi_sock_id =
2350                     ((so->so_gencnt << 32) | inp->inp_flowhash);
2351         } else {
2352                 // This is the UDP case, cfil_info is tracked in per-socket hash
2353                 cfil_info->cfi_so = so;
2354                 hash_entry->cfentry_cfil = cfil_info;
2355                 cfil_info->cfi_hash_entry = hash_entry;
2356                 cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2357                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2358                     inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2359
2360                 // Wake up gc thread if this is first flow added
2361                 if (cfil_sock_udp_attached_count == 0) {
2362                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2363                 }
2364
2365                 cfil_sock_udp_attached_count++;
2366         }
2367
2368         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2369         SLIST_INIT(&cfil_info->cfi_ordered_entries);
2370
2371         cfil_sock_attached_count++;
2372
2373         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2374
2375 done:
2376         if (cfil_info != NULL) {
2377                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2378         } else {
2379                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2380         }
2381
2382         return cfil_info;
2383 }
2384
2385 int
2386 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2387 {
2388         int kcunit;
2389         int attached = 0;
2390
2391         CFIL_LOG(LOG_INFO, "");
2392
2393         socket_lock_assert_owned(so);
2394
2395         cfil_rw_lock_exclusive(&cfil_lck_rw);
2396
2397         for (kcunit = 1;
2398             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2399             kcunit++) {
2400                 struct content_filter *cfc = content_filters[kcunit - 1];
2401                 struct cfil_entry *entry;
2402                 struct cfil_entry *iter_entry;
2403                 struct cfil_entry *iter_prev;
2404
2405                 if (cfc == NULL) {
2406                         continue;
2407                 }
2408                 if (!(cfc->cf_necp_control_unit & filter_control_unit)) {
2409                         continue;
2410                 }
2411
2412                 entry = &cfil_info->cfi_entries[kcunit - 1];
2413
2414                 entry->cfe_filter = cfc;
2415                 entry->cfe_necp_control_unit = cfc->cf_necp_control_unit;
2416                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2417                 cfc->cf_sock_count++;
2418
2419                 /* Insert the entry into the list ordered by control unit */
2420                 iter_prev = NULL;
2421                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
2422                         if (entry->cfe_necp_control_unit < iter_entry->cfe_necp_control_unit) {
2423                                 break;
2424                         }
2425                         iter_prev = iter_entry;
2426                 }
2427
2428                 if (iter_prev == NULL) {
2429                         SLIST_INSERT_HEAD(&cfil_info->cfi_ordered_entries, entry, cfe_order_link);
2430                 } else {
2431                         SLIST_INSERT_AFTER(iter_prev, entry, cfe_order_link);
2432                 }
2433
2434                 verify_content_filter(cfc);
2435                 attached = 1;
2436                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2437         }
2438
2439         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2440
2441         return attached;
2442 }
2443
2444 static void
2445 cfil_info_free(struct cfil_info *cfil_info)
2446 {
2447         int kcunit;
2448         uint64_t in_drain = 0;
2449         uint64_t out_drained = 0;
2450
2451         if (cfil_info == NULL) {
2452                 return;
2453         }
2454
2455         CFIL_LOG(LOG_INFO, "");
2456
2457         cfil_rw_lock_exclusive(&cfil_lck_rw);
2458
2459         for (kcunit = 1;
2460             content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2461             kcunit++) {
2462                 struct cfil_entry *entry;
2463                 struct content_filter *cfc;
2464
2465                 entry = &cfil_info->cfi_entries[kcunit - 1];
2466
2467                 /* Don't be silly and try to detach twice */
2468                 if (entry->cfe_filter == NULL) {
2469                         continue;
2470                 }
2471
2472                 cfc = content_filters[kcunit - 1];
2473
2474                 VERIFY(cfc == entry->cfe_filter);
2475
2476                 entry->cfe_filter = NULL;
2477                 entry->cfe_necp_control_unit = 0;
2478                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2479                 cfc->cf_sock_count--;
2480
2481                 verify_content_filter(cfc);
2482         }
2483         if (cfil_info->cfi_hash_entry != NULL) {
2484                 cfil_sock_udp_attached_count--;
2485         }
2486         cfil_sock_attached_count--;
2487         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2488
2489         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2490         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2491
2492         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2493                 struct cfil_entry *entry;
2494
2495                 entry = &cfil_info->cfi_entries[kcunit - 1];
2496                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2497                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2498                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2499                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2500         }
2501         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2502
2503         if (out_drained) {
2504                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2505         }
2506         if (in_drain) {
2507                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2508         }
2509
2510         zfree(cfil_info_zone, cfil_info);
2511 }
2512
2513 /*
2514  * Received a verdict from userspace for a socket.
2515  * Perform any delayed operation if needed.
2516  */
2517 static void
2518 cfil_sock_received_verdict(struct socket *so)
2519 {
2520         if (so == NULL || so->so_cfil == NULL) {
2521                 return;
2522         }
2523
2524         so->so_cfil->cfi_flags |= CFIF_INITIAL_VERDICT;
2525
2526         /*
2527          * If socket has already been connected, trigger
2528          * soisconnected now.
2529          */
2530         if (so->so_cfil->cfi_flags & CFIF_SOCKET_CONNECTED) {
2531                 so->so_cfil->cfi_flags &= ~CFIF_SOCKET_CONNECTED;
2532                 soisconnected(so);
2533                 return;
2534         }
2535 }
2536
2537 /*
2538  * Entry point from Sockets layer
2539  * The socket is locked.
2540  *
2541  * Checks if a connected socket is subject to filter and
2542  * pending the initial verdict.
2543  */
2544 boolean_t
2545 cfil_sock_connected_pending_verdict(struct socket *so)
2546 {
2547         if (so == NULL || so->so_cfil == NULL) {
2548                 return false;
2549         }
2550
2551         if (so->so_cfil->cfi_flags & CFIF_INITIAL_VERDICT) {
2552                 return false;
2553         } else {
2554                 /*
2555                  * Remember that this protocol is already connected, so
2556                  * we will trigger soisconnected() upon receipt of
2557                  * initial verdict later.
2558                  */
2559                 so->so_cfil->cfi_flags |= CFIF_SOCKET_CONNECTED;
2560                 return true;
2561         }
2562 }
2563
2564 boolean_t
2565 cfil_filter_present(void)
2566 {
2567         return cfil_active_count > 0;
2568 }
2569
2570 /*
2571  * Entry point from Sockets layer
2572  * The socket is locked.
2573  */
2574 errno_t
2575 cfil_sock_attach(struct socket *so, struct sockaddr *local, struct sockaddr *remote, int dir)
2576 {
2577         errno_t error = 0;
2578         uint32_t filter_control_unit;
2579
2580         socket_lock_assert_owned(so);
2581
2582         /* Limit ourselves to TCP that are not MPTCP subflows */
2583         if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2584             so->so_proto->pr_domain->dom_family != PF_INET6) ||
2585             so->so_proto->pr_type != SOCK_STREAM ||
2586             so->so_proto->pr_protocol != IPPROTO_TCP ||
2587             (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2588             (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
2589                 goto done;
2590         }
2591
2592         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2593         if (filter_control_unit == 0) {
2594                 goto done;
2595         }
2596
2597         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
2598                 goto done;
2599         }
2600         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2601                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2602                 goto done;
2603         }
2604         if (cfil_active_count == 0) {
2605                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2606                 goto done;
2607         }
2608         if (so->so_cfil != NULL) {
2609                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2610                 CFIL_LOG(LOG_ERR, "already attached");
2611         } else {
2612                 cfil_info_alloc(so, NULL);
2613                 if (so->so_cfil == NULL) {
2614                         error = ENOMEM;
2615                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2616                         goto done;
2617                 }
2618                 so->so_cfil->cfi_dir = dir;
2619         }
2620         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2621                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2622                     filter_control_unit);
2623                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2624                 goto done;
2625         }
2626         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2627             (uint64_t)VM_KERNEL_ADDRPERM(so),
2628             filter_control_unit, so->so_cfil->cfi_sock_id);
2629
2630         so->so_flags |= SOF_CONTENT_FILTER;
2631         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2632
2633         /* Hold a reference on the socket */
2634         so->so_usecount++;
2635
2636         /*
2637          * Save passed addresses for attach event msg (in case resend
2638          * is needed.
2639          */
2640         if (remote != NULL) {
2641                 memcpy(&so->so_cfil->cfi_so_attach_faddr, remote, remote->sa_len);
2642         }
2643         if (local != NULL) {
2644                 memcpy(&so->so_cfil->cfi_so_attach_laddr, local, local->sa_len);
2645         }
2646
2647         error = cfil_dispatch_attach_event(so, so->so_cfil, 0, dir);
2648         /* We can recover from flow control or out of memory errors */
2649         if (error == ENOBUFS || error == ENOMEM) {
2650                 error = 0;
2651         } else if (error != 0) {
2652                 goto done;
2653         }
2654
2655         CFIL_INFO_VERIFY(so->so_cfil);
2656 done:
2657         return error;
2658 }
2659
2660 /*
2661  * Entry point from Sockets layer
2662  * The socket is locked.
2663  */
2664 errno_t
2665 cfil_sock_detach(struct socket *so)
2666 {
2667         if (IS_UDP(so)) {
2668                 cfil_db_free(so);
2669                 return 0;
2670         }
2671
2672         if (so->so_cfil) {
2673                 if (so->so_flags & SOF_CONTENT_FILTER) {
2674                         so->so_flags &= ~SOF_CONTENT_FILTER;
2675                         VERIFY(so->so_usecount > 0);
2676                         so->so_usecount--;
2677                 }
2678                 cfil_info_free(so->so_cfil);
2679                 so->so_cfil = NULL;
2680                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2681         }
2682         return 0;
2683 }
2684
2685 /*
2686  * Fill in the address info of an event message from either
2687  * the socket or passed in address info.
2688  */
2689 static void
2690 cfil_fill_event_msg_addresses(struct cfil_hash_entry *entry, struct inpcb *inp,
2691     union sockaddr_in_4_6 *sin_src, union sockaddr_in_4_6 *sin_dst,
2692     boolean_t isIPv4, boolean_t outgoing)
2693 {
2694         if (isIPv4) {
2695                 struct in_addr laddr = {0}, faddr = {0};
2696                 u_int16_t lport = 0, fport = 0;
2697
2698                 cfil_get_flow_address(entry, inp, &laddr, &faddr, &lport, &fport);
2699
2700                 if (outgoing) {
2701                         fill_ip_sockaddr_4_6(sin_src, laddr, lport);
2702                         fill_ip_sockaddr_4_6(sin_dst, faddr, fport);
2703                 } else {
2704                         fill_ip_sockaddr_4_6(sin_src, faddr, fport);
2705                         fill_ip_sockaddr_4_6(sin_dst, laddr, lport);
2706                 }
2707         } else {
2708                 struct in6_addr *laddr = NULL, *faddr = NULL;
2709                 u_int16_t lport = 0, fport = 0;
2710
2711                 cfil_get_flow_address_v6(entry, inp, &laddr, &faddr, &lport, &fport);
2712                 if (outgoing) {
2713                         fill_ip6_sockaddr_4_6(sin_src, laddr, lport);
2714                         fill_ip6_sockaddr_4_6(sin_dst, faddr, fport);
2715                 } else {
2716                         fill_ip6_sockaddr_4_6(sin_src, faddr, fport);
2717                         fill_ip6_sockaddr_4_6(sin_dst, laddr, lport);
2718                 }
2719         }
2720 }
2721
2722 static boolean_t
2723 cfil_dispatch_attach_event_sign(cfil_crypto_state_t crypto_state,
2724     struct cfil_info *cfil_info,
2725     struct cfil_msg_sock_attached *msg)
2726 {
2727         struct cfil_crypto_data data = {};
2728
2729         if (crypto_state == NULL || msg == NULL || cfil_info == NULL) {
2730                 return false;
2731         }
2732
2733         data.sock_id = msg->cfs_msghdr.cfm_sock_id;
2734         data.direction = msg->cfs_conn_dir;
2735
2736         data.pid = msg->cfs_pid;
2737         data.effective_pid = msg->cfs_e_pid;
2738         uuid_copy(data.uuid, msg->cfs_uuid);
2739         uuid_copy(data.effective_uuid, msg->cfs_e_uuid);
2740         data.socketProtocol = msg->cfs_sock_protocol;
2741         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2742                 data.remote.sin6 = msg->cfs_dst.sin6;
2743                 data.local.sin6 = msg->cfs_src.sin6;
2744         } else {
2745                 data.remote.sin6 = msg->cfs_src.sin6;
2746                 data.local.sin6 = msg->cfs_dst.sin6;
2747         }
2748
2749         // At attach, if local address is already present, no need to re-sign subsequent data messages.
2750         if (!NULLADDRESS(data.local)) {
2751                 cfil_info->cfi_isSignatureLatest = true;
2752         }
2753
2754         msg->cfs_signature_length = sizeof(cfil_crypto_signature);
2755         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfs_signature, &msg->cfs_signature_length) != 0) {
2756                 msg->cfs_signature_length = 0;
2757                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign attached msg <sockID %llu>",
2758                     msg->cfs_msghdr.cfm_sock_id);
2759                 return false;
2760         }
2761
2762         return true;
2763 }
2764
2765 static boolean_t
2766 cfil_dispatch_data_event_sign(cfil_crypto_state_t crypto_state,
2767     struct socket *so, struct cfil_info *cfil_info,
2768     struct cfil_msg_data_event *msg)
2769 {
2770         struct cfil_crypto_data data = {};
2771
2772         if (crypto_state == NULL || msg == NULL ||
2773             so == NULL || cfil_info == NULL) {
2774                 return false;
2775         }
2776
2777         data.sock_id = cfil_info->cfi_sock_id;
2778         data.direction = cfil_info->cfi_dir;
2779         data.pid = so->last_pid;
2780         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2781         if (so->so_flags & SOF_DELEGATED) {
2782                 data.effective_pid = so->e_pid;
2783                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2784         } else {
2785                 data.effective_pid = so->last_pid;
2786                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2787         }
2788         data.socketProtocol = so->so_proto->pr_protocol;
2789
2790         if (data.direction == CFS_CONNECTION_DIR_OUT) {
2791                 data.remote.sin6 = msg->cfc_dst.sin6;
2792                 data.local.sin6 = msg->cfc_src.sin6;
2793         } else {
2794                 data.remote.sin6 = msg->cfc_src.sin6;
2795                 data.local.sin6 = msg->cfc_dst.sin6;
2796         }
2797
2798         // At first data, local address may show up for the first time, update address cache and
2799         // no need to re-sign subsequent data messages anymore.
2800         if (!NULLADDRESS(data.local)) {
2801                 memcpy(&cfil_info->cfi_so_attach_laddr, &data.local, data.local.sa.sa_len);
2802                 cfil_info->cfi_isSignatureLatest = true;
2803         }
2804
2805         msg->cfd_signature_length = sizeof(cfil_crypto_signature);
2806         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfd_signature, &msg->cfd_signature_length) != 0) {
2807                 msg->cfd_signature_length = 0;
2808                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign data msg <sockID %llu>",
2809                     msg->cfd_msghdr.cfm_sock_id);
2810                 return false;
2811         }
2812
2813         return true;
2814 }
2815
2816 static boolean_t
2817 cfil_dispatch_closed_event_sign(cfil_crypto_state_t crypto_state,
2818     struct socket *so, struct cfil_info *cfil_info,
2819     struct cfil_msg_sock_closed *msg)
2820 {
2821         struct cfil_crypto_data data = {};
2822         struct cfil_hash_entry hash_entry = {};
2823         struct cfil_hash_entry *hash_entry_ptr = NULL;
2824         struct inpcb *inp = (struct inpcb *)so->so_pcb;
2825
2826         if (crypto_state == NULL || msg == NULL ||
2827             so == NULL || inp == NULL || cfil_info == NULL) {
2828                 return false;
2829         }
2830
2831         data.sock_id = cfil_info->cfi_sock_id;
2832         data.direction = cfil_info->cfi_dir;
2833
2834         data.pid = so->last_pid;
2835         memcpy(data.uuid, so->last_uuid, sizeof(uuid_t));
2836         if (so->so_flags & SOF_DELEGATED) {
2837                 data.effective_pid = so->e_pid;
2838                 memcpy(data.effective_uuid, so->e_uuid, sizeof(uuid_t));
2839         } else {
2840                 data.effective_pid = so->last_pid;
2841                 memcpy(data.effective_uuid, so->last_uuid, sizeof(uuid_t));
2842         }
2843         data.socketProtocol = so->so_proto->pr_protocol;
2844
2845         /*
2846          * Fill in address info:
2847          * For UDP, use the cfil_info hash entry directly.
2848          * For TCP, compose an hash entry with the saved addresses.
2849          */
2850         if (cfil_info->cfi_hash_entry != NULL) {
2851                 hash_entry_ptr = cfil_info->cfi_hash_entry;
2852         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
2853             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
2854                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
2855                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
2856                 hash_entry_ptr = &hash_entry;
2857         }
2858         if (hash_entry_ptr != NULL) {
2859                 boolean_t outgoing = (cfil_info->cfi_dir == CFS_CONNECTION_DIR_OUT);
2860                 union sockaddr_in_4_6 *src = outgoing ? &data.local : &data.remote;
2861                 union sockaddr_in_4_6 *dst = outgoing ? &data.remote : &data.local;
2862                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp, src, dst, inp->inp_vflag & INP_IPV4, outgoing);
2863         }
2864
2865         data.byte_count_in = cfil_info->cfi_byte_inbound_count;
2866         data.byte_count_out = cfil_info->cfi_byte_outbound_count;
2867
2868         msg->cfc_signature_length = sizeof(cfil_crypto_signature);
2869         if (cfil_crypto_sign_data(crypto_state, &data, msg->cfc_signature, &msg->cfc_signature_length) != 0) {
2870                 msg->cfc_signature_length = 0;
2871                 CFIL_LOG(LOG_ERR, "CFIL: Failed to sign closed msg <sockID %llu>",
2872                     msg->cfc_msghdr.cfm_sock_id);
2873                 return false;
2874         }
2875
2876         return true;
2877 }
2878
2879 static int
2880 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info,
2881     uint32_t kcunit, int conn_dir)
2882 {
2883         errno_t error = 0;
2884         struct cfil_entry *entry = NULL;
2885         struct cfil_msg_sock_attached msg_attached;
2886         struct content_filter *cfc = NULL;
2887         struct inpcb *inp = (struct inpcb *)so->so_pcb;
2888         struct cfil_hash_entry *hash_entry_ptr = NULL;
2889         struct cfil_hash_entry hash_entry;
2890
2891         memset(&hash_entry, 0, sizeof(struct cfil_hash_entry));
2892         proc_t p = PROC_NULL;
2893         task_t t = TASK_NULL;
2894
2895         socket_lock_assert_owned(so);
2896
2897         cfil_rw_lock_shared(&cfil_lck_rw);
2898
2899         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2900                 error = EINVAL;
2901                 goto done;
2902         }
2903
2904         if (kcunit == 0) {
2905                 entry = SLIST_FIRST(&cfil_info->cfi_ordered_entries);
2906         } else {
2907                 entry = &cfil_info->cfi_entries[kcunit - 1];
2908         }
2909
2910         if (entry == NULL) {
2911                 goto done;
2912         }
2913
2914         cfc = entry->cfe_filter;
2915         if (cfc == NULL) {
2916                 goto done;
2917         }
2918
2919         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)) {
2920                 goto done;
2921         }
2922
2923         if (kcunit == 0) {
2924                 kcunit = CFI_ENTRY_KCUNIT(cfil_info, entry);
2925         }
2926
2927         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2928             (uint64_t)VM_KERNEL_ADDRPERM(so), entry->cfe_necp_control_unit, kcunit);
2929
2930         /* Would be wasteful to try when flow controlled */
2931         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2932                 error = ENOBUFS;
2933                 goto done;
2934         }
2935
2936         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2937         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2938         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2939         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2940         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2941         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2942
2943         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2944         msg_attached.cfs_sock_type = so->so_proto->pr_type;
2945         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2946         msg_attached.cfs_pid = so->last_pid;
2947         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2948         if (so->so_flags & SOF_DELEGATED) {
2949                 msg_attached.cfs_e_pid = so->e_pid;
2950                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2951         } else {
2952                 msg_attached.cfs_e_pid = so->last_pid;
2953                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2954         }
2955
2956         /*
2957          * Fill in address info:
2958          * For UDP, use the cfil_info hash entry directly.
2959          * For TCP, compose an hash entry with the saved addresses.
2960          */
2961         if (cfil_info->cfi_hash_entry != NULL) {
2962                 hash_entry_ptr = cfil_info->cfi_hash_entry;
2963         } else if (cfil_info->cfi_so_attach_faddr.sa.sa_len > 0 ||
2964             cfil_info->cfi_so_attach_laddr.sa.sa_len > 0) {
2965                 fill_cfil_hash_entry_from_address(&hash_entry, TRUE, &cfil_info->cfi_so_attach_laddr.sa);
2966                 fill_cfil_hash_entry_from_address(&hash_entry, FALSE, &cfil_info->cfi_so_attach_faddr.sa);
2967                 hash_entry_ptr = &hash_entry;
2968         }
2969         if (hash_entry_ptr != NULL) {
2970                 cfil_fill_event_msg_addresses(hash_entry_ptr, inp,
2971                     &msg_attached.cfs_src, &msg_attached.cfs_dst,
2972                     inp->inp_vflag & INP_IPV4, conn_dir == CFS_CONNECTION_DIR_OUT);
2973         }
2974         msg_attached.cfs_conn_dir = conn_dir;
2975
2976         if (msg_attached.cfs_e_pid != 0) {
2977                 p = proc_find(msg_attached.cfs_e_pid);
2978                 if (p != PROC_NULL) {
2979                         t = proc_task(p);
2980                         if (t != TASK_NULL) {
2981                                 audit_token_t audit_token;
2982                                 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;
2983                                 if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&audit_token, &count) == KERN_SUCCESS) {
2984                                         memcpy(&msg_attached.cfs_audit_token, &audit_token, sizeof(msg_attached.cfs_audit_token));
2985                                 } else {
2986                                         CFIL_LOG(LOG_ERR, "CFIL: Failed to get process audit token <sockID %llu> ",
2987                                             entry->cfe_cfil_info->cfi_sock_id);
2988                                 }
2989                         }
2990                         proc_rele(p);
2991                 }
2992         }
2993
2994         cfil_dispatch_attach_event_sign(entry->cfe_filter->cf_crypto_state, cfil_info, &msg_attached);
2995
2996 #if LIFECYCLE_DEBUG
2997         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2998             entry->cfe_cfil_info->cfi_sock_id);
2999 #endif
3000
3001         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3002             entry->cfe_filter->cf_kcunit,
3003             &msg_attached,
3004             sizeof(struct cfil_msg_sock_attached),
3005             CTL_DATA_EOR);
3006         if (error != 0) {
3007                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
3008                 goto done;
3009         }
3010         microuptime(&entry->cfe_last_event);
3011         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
3012         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
3013
3014         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
3015         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
3016 done:
3017
3018         /* We can recover from flow control */
3019         if (error == ENOBUFS) {
3020                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3021                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
3022
3023                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3024                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3025                 }
3026
3027                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3028
3029                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3030         } else {
3031                 if (error != 0) {
3032                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
3033                 }
3034
3035                 cfil_rw_unlock_shared(&cfil_lck_rw);
3036         }
3037         return error;
3038 }
3039
3040 static int
3041 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3042 {
3043         errno_t error = 0;
3044         struct mbuf *msg = NULL;
3045         struct cfil_entry *entry;
3046         struct cfe_buf *entrybuf;
3047         struct cfil_msg_hdr msg_disconnected;
3048         struct content_filter *cfc;
3049
3050         socket_lock_assert_owned(so);
3051
3052         cfil_rw_lock_shared(&cfil_lck_rw);
3053
3054         entry = &cfil_info->cfi_entries[kcunit - 1];
3055         if (outgoing) {
3056                 entrybuf = &entry->cfe_snd;
3057         } else {
3058                 entrybuf = &entry->cfe_rcv;
3059         }
3060
3061         cfc = entry->cfe_filter;
3062         if (cfc == NULL) {
3063                 goto done;
3064         }
3065
3066         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3067             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3068
3069         /*
3070          * Send the disconnection event once
3071          */
3072         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
3073             (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
3074                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
3075                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3076                 goto done;
3077         }
3078
3079         /*
3080          * We're not disconnected as long as some data is waiting
3081          * to be delivered to the filter
3082          */
3083         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
3084                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
3085                     (uint64_t)VM_KERNEL_ADDRPERM(so));
3086                 error = EBUSY;
3087                 goto done;
3088         }
3089         /* Would be wasteful to try when flow controlled */
3090         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3091                 error = ENOBUFS;
3092                 goto done;
3093         }
3094
3095 #if LIFECYCLE_DEBUG
3096         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3097             "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
3098             "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
3099 #endif
3100
3101         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
3102         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
3103         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
3104         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
3105         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
3106             CFM_OP_DISCONNECT_IN;
3107         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3108         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3109             entry->cfe_filter->cf_kcunit,
3110             &msg_disconnected,
3111             sizeof(struct cfil_msg_hdr),
3112             CTL_DATA_EOR);
3113         if (error != 0) {
3114                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3115                 mbuf_freem(msg);
3116                 goto done;
3117         }
3118         microuptime(&entry->cfe_last_event);
3119         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
3120
3121         /* Remember we have sent the disconnection message */
3122         if (outgoing) {
3123                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
3124                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
3125         } else {
3126                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
3127                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
3128         }
3129 done:
3130         if (error == ENOBUFS) {
3131                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3132                 OSIncrementAtomic(
3133                         &cfil_stats.cfs_disconnect_event_flow_control);
3134
3135                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3136                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3137                 }
3138
3139                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3140
3141                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3142         } else {
3143                 if (error != 0) {
3144                         OSIncrementAtomic(
3145                                 &cfil_stats.cfs_disconnect_event_fail);
3146                 }
3147
3148                 cfil_rw_unlock_shared(&cfil_lck_rw);
3149         }
3150         return error;
3151 }
3152
3153 int
3154 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
3155 {
3156         struct cfil_entry *entry;
3157         struct cfil_msg_sock_closed msg_closed;
3158         errno_t error = 0;
3159         struct content_filter *cfc;
3160
3161         socket_lock_assert_owned(so);
3162
3163         cfil_rw_lock_shared(&cfil_lck_rw);
3164
3165         entry = &cfil_info->cfi_entries[kcunit - 1];
3166         cfc = entry->cfe_filter;
3167         if (cfc == NULL) {
3168                 goto done;
3169         }
3170
3171         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
3172             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3173
3174         /* Would be wasteful to try when flow controlled */
3175         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3176                 error = ENOBUFS;
3177                 goto done;
3178         }
3179         /*
3180          * Send a single closed message per filter
3181          */
3182         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0) {
3183                 goto done;
3184         }
3185         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3186                 goto done;
3187         }
3188
3189         microuptime(&entry->cfe_last_event);
3190         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
3191
3192         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
3193         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
3194         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
3195         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
3196         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
3197         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
3198         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
3199         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
3200         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t) * CFI_MAX_TIME_LOG_ENTRY);
3201         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char) * CFI_MAX_TIME_LOG_ENTRY);
3202         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
3203         msg_closed.cfc_byte_inbound_count = cfil_info->cfi_byte_inbound_count;
3204         msg_closed.cfc_byte_outbound_count = cfil_info->cfi_byte_outbound_count;
3205
3206         cfil_dispatch_closed_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, &msg_closed);
3207
3208 #if LIFECYCLE_DEBUG
3209         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
3210 #endif
3211         /* for debugging
3212          *  if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
3213          *       msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
3214          *  }
3215          *  for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
3216          *       CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
3217          *  }
3218          */
3219
3220         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
3221             entry->cfe_filter->cf_kcunit,
3222             &msg_closed,
3223             sizeof(struct cfil_msg_sock_closed),
3224             CTL_DATA_EOR);
3225         if (error != 0) {
3226                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
3227                     error);
3228                 goto done;
3229         }
3230
3231         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
3232         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
3233 done:
3234         /* We can recover from flow control */
3235         if (error == ENOBUFS) {
3236                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3237                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
3238
3239                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3240                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3241                 }
3242
3243                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3244
3245                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3246         } else {
3247                 if (error != 0) {
3248                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
3249                 }
3250
3251                 cfil_rw_unlock_shared(&cfil_lck_rw);
3252         }
3253
3254         return error;
3255 }
3256
3257 static void
3258 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3259     struct in6_addr *ip6, u_int16_t port)
3260 {
3261         struct sockaddr_in6 *sin6 = &sin46->sin6;
3262
3263         sin6->sin6_family = AF_INET6;
3264         sin6->sin6_len = sizeof(*sin6);
3265         sin6->sin6_port = port;
3266         sin6->sin6_addr = *ip6;
3267         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
3268                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
3269                 sin6->sin6_addr.s6_addr16[1] = 0;
3270         }
3271 }
3272
3273 static void
3274 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
3275     struct in_addr ip, u_int16_t port)
3276 {
3277         struct sockaddr_in *sin = &sin46->sin;
3278
3279         sin->sin_family = AF_INET;
3280         sin->sin_len = sizeof(*sin);
3281         sin->sin_port = port;
3282         sin->sin_addr.s_addr = ip.s_addr;
3283 }
3284
3285 static void
3286 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
3287     struct in6_addr **laddr, struct in6_addr **faddr,
3288     u_int16_t *lport, u_int16_t *fport)
3289 {
3290         if (entry != NULL) {
3291                 *laddr = &entry->cfentry_laddr.addr6;
3292                 *faddr = &entry->cfentry_faddr.addr6;
3293                 *lport = entry->cfentry_lport;
3294                 *fport = entry->cfentry_fport;
3295         } else {
3296                 *laddr = &inp->in6p_laddr;
3297                 *faddr = &inp->in6p_faddr;
3298                 *lport = inp->inp_lport;
3299                 *fport = inp->inp_fport;
3300         }
3301 }
3302
3303 static void
3304 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
3305     struct in_addr *laddr, struct in_addr *faddr,
3306     u_int16_t *lport, u_int16_t *fport)
3307 {
3308         if (entry != NULL) {
3309                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
3310                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
3311                 *lport = entry->cfentry_lport;
3312                 *fport = entry->cfentry_fport;
3313         } else {
3314                 *laddr = inp->inp_laddr;
3315                 *faddr = inp->inp_faddr;
3316                 *lport = inp->inp_lport;
3317                 *fport = inp->inp_fport;
3318         }
3319 }
3320
3321 static int
3322 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3323     struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
3324 {
3325         errno_t error = 0;
3326         struct mbuf *copy = NULL;
3327         struct mbuf *msg = NULL;
3328         unsigned int one = 1;
3329         struct cfil_msg_data_event *data_req;
3330         size_t hdrsize;
3331         struct inpcb *inp = (struct inpcb *)so->so_pcb;
3332         struct cfil_entry *entry;
3333         struct cfe_buf *entrybuf;
3334         struct content_filter *cfc;
3335         struct timeval tv;
3336
3337         cfil_rw_lock_shared(&cfil_lck_rw);
3338
3339         entry = &cfil_info->cfi_entries[kcunit - 1];
3340         if (outgoing) {
3341                 entrybuf = &entry->cfe_snd;
3342         } else {
3343                 entrybuf = &entry->cfe_rcv;
3344         }
3345
3346         cfc = entry->cfe_filter;
3347         if (cfc == NULL) {
3348                 goto done;
3349         }
3350
3351         data = cfil_data_start(data);
3352         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
3353                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
3354                 goto done;
3355         }
3356
3357         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3358             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3359
3360         socket_lock_assert_owned(so);
3361
3362         /* Would be wasteful to try */
3363         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
3364                 error = ENOBUFS;
3365                 goto done;
3366         }
3367
3368         /* Make a copy of the data to pass to kernel control socket */
3369         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
3370             M_COPYM_NOOP_HDR);
3371         if (copy == NULL) {
3372                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
3373                 error = ENOMEM;
3374                 goto done;
3375         }
3376
3377         /* We need an mbuf packet for the message header */
3378         hdrsize = sizeof(struct cfil_msg_data_event);
3379         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
3380         if (error != 0) {
3381                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
3382                 m_freem(copy);
3383                 /*
3384                  * ENOBUFS is to indicate flow control
3385                  */
3386                 error = ENOMEM;
3387                 goto done;
3388         }
3389         mbuf_setlen(msg, hdrsize);
3390         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
3391         msg->m_next = copy;
3392         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
3393         bzero(data_req, hdrsize);
3394         data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
3395         data_req->cfd_msghdr.cfm_version = 1;
3396         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
3397         data_req->cfd_msghdr.cfm_op =
3398             outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
3399         data_req->cfd_msghdr.cfm_sock_id =
3400             entry->cfe_cfil_info->cfi_sock_id;
3401         data_req->cfd_start_offset = entrybuf->cfe_peeked;
3402         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
3403
3404         /*
3405          * Copy address/port into event msg.
3406          * For non connected sockets need to copy addresses from passed
3407          * parameters
3408          */
3409         cfil_fill_event_msg_addresses(cfil_info->cfi_hash_entry, inp,
3410             &data_req->cfc_src, &data_req->cfc_dst,
3411             inp->inp_vflag & INP_IPV4, outgoing);
3412
3413         if (cfil_info->cfi_isSignatureLatest == false) {
3414                 cfil_dispatch_data_event_sign(entry->cfe_filter->cf_crypto_state, so, cfil_info, data_req);
3415         }
3416
3417         microuptime(&tv);
3418         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
3419
3420         /* Pass the message to the content filter */
3421         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
3422             entry->cfe_filter->cf_kcunit,
3423             msg, CTL_DATA_EOR);
3424         if (error != 0) {
3425                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
3426                 mbuf_freem(msg);
3427                 goto done;
3428         }
3429         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
3430         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
3431
3432 #if VERDICT_DEBUG
3433         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
3434             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
3435 #endif
3436
3437 done:
3438         if (error == ENOBUFS) {
3439                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
3440                 OSIncrementAtomic(
3441                         &cfil_stats.cfs_data_event_flow_control);
3442
3443                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw)) {
3444                         cfil_rw_lock_exclusive(&cfil_lck_rw);
3445                 }
3446
3447                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
3448
3449                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
3450         } else {
3451                 if (error != 0) {
3452                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3453                 }
3454
3455                 cfil_rw_unlock_shared(&cfil_lck_rw);
3456         }
3457         return error;
3458 }
3459
3460 /*
3461  * Process the queue of data waiting to be delivered to content filter
3462  */
3463 static int
3464 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3465 {
3466         errno_t error = 0;
3467         struct mbuf *data, *tmp = NULL;
3468         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3469         struct cfil_entry *entry;
3470         struct cfe_buf *entrybuf;
3471         uint64_t currentoffset = 0;
3472
3473         if (cfil_info == NULL) {
3474                 return 0;
3475         }
3476
3477         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3478             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3479
3480         socket_lock_assert_owned(so);
3481
3482         entry = &cfil_info->cfi_entries[kcunit - 1];
3483         if (outgoing) {
3484                 entrybuf = &entry->cfe_snd;
3485         } else {
3486                 entrybuf = &entry->cfe_rcv;
3487         }
3488
3489         /* Send attached message if not yet done */
3490         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3491                 error = cfil_dispatch_attach_event(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, entry),
3492                     outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
3493                 if (error != 0) {
3494                         /* We can recover from flow control */
3495                         if (error == ENOBUFS || error == ENOMEM) {
3496                                 error = 0;
3497                         }
3498                         goto done;
3499                 }
3500         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3501                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3502                 goto done;
3503         }
3504
3505 #if DATA_DEBUG
3506         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3507             entrybuf->cfe_pass_offset,
3508             entrybuf->cfe_peeked,
3509             entrybuf->cfe_peek_offset);
3510 #endif
3511
3512         /* Move all data that can pass */
3513         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3514             entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3515                 datalen = cfil_data_length(data, NULL, NULL);
3516                 tmp = data;
3517
3518                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3519                     entrybuf->cfe_pass_offset) {
3520                         /*
3521                          * The first mbuf can fully pass
3522                          */
3523                         copylen = datalen;
3524                 } else {
3525                         /*
3526                          * The first mbuf can partially pass
3527                          */
3528                         copylen = entrybuf->cfe_pass_offset -
3529                             entrybuf->cfe_ctl_q.q_start;
3530                 }
3531                 VERIFY(copylen <= datalen);
3532
3533 #if DATA_DEBUG
3534                 CFIL_LOG(LOG_DEBUG,
3535                     "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3536                     "datalen %u copylen %u",
3537                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3538                     entrybuf->cfe_ctl_q.q_start,
3539                     entrybuf->cfe_peeked,
3540                     entrybuf->cfe_pass_offset,
3541                     entrybuf->cfe_peek_offset,
3542                     datalen, copylen);
3543 #endif
3544
3545                 /*
3546                  * Data that passes has been peeked at explicitly or
3547                  * implicitly
3548                  */
3549                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3550                     entrybuf->cfe_peeked) {
3551                         entrybuf->cfe_peeked =
3552                             entrybuf->cfe_ctl_q.q_start + copylen;
3553                 }
3554                 /*
3555                  * Stop on partial pass
3556                  */
3557                 if (copylen < datalen) {
3558                         break;
3559                 }
3560
3561                 /* All good, move full data from ctl queue to pending queue */
3562                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3563
3564                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3565                 if (outgoing) {
3566                         OSAddAtomic64(datalen,
3567                             &cfil_stats.cfs_pending_q_out_enqueued);
3568                 } else {
3569                         OSAddAtomic64(datalen,
3570                             &cfil_stats.cfs_pending_q_in_enqueued);
3571                 }
3572         }
3573         CFIL_INFO_VERIFY(cfil_info);
3574         if (tmp != NULL) {
3575                 CFIL_LOG(LOG_DEBUG,
3576                     "%llx first %llu peeked %llu pass %llu peek %llu"
3577                     "datalen %u copylen %u",
3578                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3579                     entrybuf->cfe_ctl_q.q_start,
3580                     entrybuf->cfe_peeked,
3581                     entrybuf->cfe_pass_offset,
3582                     entrybuf->cfe_peek_offset,
3583                     datalen, copylen);
3584         }
3585         tmp = NULL;
3586
3587         /* Now deal with remaining data the filter wants to peek at */
3588         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3589             currentoffset = entrybuf->cfe_ctl_q.q_start;
3590             data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3591             data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3592             currentoffset += datalen) {
3593                 datalen = cfil_data_length(data, NULL, NULL);
3594                 tmp = data;
3595
3596                 /* We've already peeked at this mbuf */
3597                 if (currentoffset + datalen <= entrybuf->cfe_peeked) {
3598                         continue;
3599                 }
3600                 /*
3601                  * The data in the first mbuf may have been
3602                  * partially peeked at
3603                  */
3604                 copyoffset = entrybuf->cfe_peeked - currentoffset;
3605                 VERIFY(copyoffset < datalen);
3606                 copylen = datalen - copyoffset;
3607                 VERIFY(copylen <= datalen);
3608                 /*
3609                  * Do not copy more than needed
3610                  */
3611                 if (currentoffset + copyoffset + copylen >
3612                     entrybuf->cfe_peek_offset) {
3613                         copylen = entrybuf->cfe_peek_offset -
3614                             (currentoffset + copyoffset);
3615                 }
3616
3617 #if DATA_DEBUG
3618                 CFIL_LOG(LOG_DEBUG,
3619                     "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3620                     "datalen %u copylen %u copyoffset %u",
3621                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3622                     currentoffset,
3623                     entrybuf->cfe_peeked,
3624                     entrybuf->cfe_pass_offset,
3625                     entrybuf->cfe_peek_offset,
3626                     datalen, copylen, copyoffset);
3627 #endif
3628
3629                 /*
3630                  * Stop if there is nothing more to peek at
3631                  */
3632                 if (copylen == 0) {
3633                         break;
3634                 }
3635                 /*
3636                  * Let the filter get a peek at this span of data
3637                  */
3638                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3639                     outgoing, data, copyoffset, copylen);
3640                 if (error != 0) {
3641                         /* On error, leave data in ctl_q */
3642                         break;
3643                 }
3644                 entrybuf->cfe_peeked += copylen;
3645                 if (outgoing) {
3646                         OSAddAtomic64(copylen,
3647                             &cfil_stats.cfs_ctl_q_out_peeked);
3648                 } else {
3649                         OSAddAtomic64(copylen,
3650                             &cfil_stats.cfs_ctl_q_in_peeked);
3651                 }
3652
3653                 /* Stop when data could not be fully peeked at */
3654                 if (copylen + copyoffset < datalen) {
3655                         break;
3656                 }
3657         }
3658         CFIL_INFO_VERIFY(cfil_info);
3659         if (tmp != NULL) {
3660                 CFIL_LOG(LOG_DEBUG,
3661                     "%llx first %llu peeked %llu pass %llu peek %llu"
3662                     "datalen %u copylen %u copyoffset %u",
3663                     (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3664                     currentoffset,
3665                     entrybuf->cfe_peeked,
3666                     entrybuf->cfe_pass_offset,
3667                     entrybuf->cfe_peek_offset,
3668                     datalen, copylen, copyoffset);
3669         }
3670
3671         /*
3672          * Process data that has passed the filter
3673          */
3674         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3675         if (error != 0) {
3676                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3677                     error);
3678                 goto done;
3679         }
3680
3681         /*
3682          * Dispatch disconnect events that could not be sent
3683          */
3684         if (cfil_info == NULL) {
3685                 goto done;
3686         } else if (outgoing) {
3687                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3688                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) {
3689                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3690                 }
3691         } else {
3692                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3693                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN)) {
3694                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3695                 }
3696         }
3697
3698 done:
3699         CFIL_LOG(LOG_DEBUG,
3700             "first %llu peeked %llu pass %llu peek %llu",
3701             entrybuf->cfe_ctl_q.q_start,
3702             entrybuf->cfe_peeked,
3703             entrybuf->cfe_pass_offset,
3704             entrybuf->cfe_peek_offset);
3705
3706         CFIL_INFO_VERIFY(cfil_info);
3707         return error;
3708 }
3709
3710 /*
3711  * cfil_data_filter()
3712  *
3713  * Process data for a content filter installed on a socket
3714  */
3715 int
3716 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3717     struct mbuf *data, uint64_t datalen)
3718 {
3719         errno_t error = 0;
3720         struct cfil_entry *entry;
3721         struct cfe_buf *entrybuf;
3722
3723         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3724             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3725
3726         socket_lock_assert_owned(so);
3727
3728         entry = &cfil_info->cfi_entries[kcunit - 1];
3729         if (outgoing) {
3730                 entrybuf = &entry->cfe_snd;
3731         } else {
3732                 entrybuf = &entry->cfe_rcv;
3733         }
3734
3735         /* Are we attached to the filter? */
3736         if (entry->cfe_filter == NULL) {
3737                 error = 0;
3738                 goto done;
3739         }
3740
3741         /* Dispatch to filters */
3742         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3743         if (outgoing) {
3744                 OSAddAtomic64(datalen,
3745                     &cfil_stats.cfs_ctl_q_out_enqueued);
3746         } else {
3747                 OSAddAtomic64(datalen,
3748                     &cfil_stats.cfs_ctl_q_in_enqueued);
3749         }
3750
3751         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3752         if (error != 0) {
3753                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3754                     error);
3755         }
3756         /*
3757          * We have to return EJUSTRETURN in all cases to avoid double free
3758          * by socket layer
3759          */
3760         error = EJUSTRETURN;
3761 done:
3762         CFIL_INFO_VERIFY(cfil_info);
3763
3764         CFIL_LOG(LOG_INFO, "return %d", error);
3765         return error;
3766 }
3767
3768 /*
3769  * cfil_service_inject_queue() re-inject data that passed the
3770  * content filters
3771  */
3772 static int
3773 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3774 {
3775         mbuf_t data;
3776         unsigned int datalen;
3777         int mbcnt = 0;
3778         int mbnum = 0;
3779         errno_t error = 0;
3780         struct cfi_buf *cfi_buf;
3781         struct cfil_queue *inject_q;
3782         int need_rwakeup = 0;
3783         int count = 0;
3784
3785         if (cfil_info == NULL) {
3786                 return 0;
3787         }
3788
3789         socket_lock_assert_owned(so);
3790
3791         if (outgoing) {
3792                 cfi_buf = &cfil_info->cfi_snd;
3793                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3794         } else {
3795                 cfi_buf = &cfil_info->cfi_rcv;
3796                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3797         }
3798         inject_q = &cfi_buf->cfi_inject_q;
3799
3800         if (cfil_queue_empty(inject_q)) {
3801                 return 0;
3802         }
3803
3804 #if DATA_DEBUG | VERDICT_DEBUG
3805         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3806             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3807 #endif
3808
3809         while ((data = cfil_queue_first(inject_q)) != NULL) {
3810                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3811
3812 #if DATA_DEBUG
3813                 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3814                     remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3815                     (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3816 #endif
3817
3818                 /* Remove data from queue and adjust stats */
3819                 cfil_queue_remove(inject_q, data, datalen);
3820                 cfi_buf->cfi_pending_first += datalen;
3821                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3822                 cfi_buf->cfi_pending_mbnum -= mbnum;
3823                 cfil_info_buf_verify(cfi_buf);
3824
3825                 if (outgoing) {
3826                         error = sosend_reinject(so, NULL, data, NULL, 0);
3827                         if (error != 0) {
3828 #if DATA_DEBUG
3829                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3830                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3831 #endif
3832                                 break;
3833                         }
3834                         // At least one injection succeeded, need to wake up pending threads.
3835                         need_rwakeup = 1;
3836                 } else {
3837                         data->m_flags |= M_SKIPCFIL;
3838
3839                         /*
3840                          * NOTE: We currently only support TCP and UDP.
3841                          * For RAWIP, MPTCP and message TCP we'll
3842                          * need to call the appropriate sbappendxxx()
3843                          * of fix sock_inject_data_in()
3844                          */
3845                         if (IS_UDP(so) == TRUE) {
3846                                 if (sbappendchain(&so->so_rcv, data, 0)) {
3847                                         need_rwakeup = 1;
3848                                 }
3849                         } else {
3850                                 if (sbappendstream(&so->so_rcv, data)) {
3851                                         need_rwakeup = 1;
3852                                 }
3853                         }
3854                 }
3855
3856                 if (outgoing) {
3857                         OSAddAtomic64(datalen,
3858                             &cfil_stats.cfs_inject_q_out_passed);
3859                 } else {
3860                         OSAddAtomic64(datalen,
3861                             &cfil_stats.cfs_inject_q_in_passed);
3862                 }
3863
3864                 count++;
3865         }
3866
3867 #if DATA_DEBUG | VERDICT_DEBUG
3868         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3869             (uint64_t)VM_KERNEL_ADDRPERM(so), count);
3870 #endif
3871
3872         /* A single wakeup is for several packets is more efficient */
3873         if (need_rwakeup) {
3874                 if (outgoing == TRUE) {
3875                         sowwakeup(so);
3876                 } else {
3877                         sorwakeup(so);
3878                 }
3879         }
3880
3881         if (error != 0 && cfil_info) {
3882                 if (error == ENOBUFS) {
3883                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
3884                 }
3885                 if (error == ENOMEM) {
3886                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
3887                 }
3888
3889                 if (outgoing) {
3890                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
3891                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3892                 } else {
3893                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
3894                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3895                 }
3896         }
3897
3898         /*
3899          * Notify
3900          */
3901         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
3902                 cfil_sock_notify_shutdown(so, SHUT_WR);
3903                 if (cfil_sock_data_pending(&so->so_snd) == 0) {
3904                         soshutdownlock_final(so, SHUT_WR);
3905                 }
3906         }
3907         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3908                 if (cfil_filters_attached(so) == 0) {
3909                         CFIL_LOG(LOG_INFO, "so %llx waking",
3910                             (uint64_t)VM_KERNEL_ADDRPERM(so));
3911                         wakeup((caddr_t)cfil_info);
3912                 }
3913         }
3914
3915         CFIL_INFO_VERIFY(cfil_info);
3916
3917         return error;
3918 }
3919
3920 static int
3921 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3922 {
3923         uint64_t passlen, curlen;
3924         mbuf_t data;
3925         unsigned int datalen;
3926         errno_t error = 0;
3927         struct cfil_entry *entry;
3928         struct cfe_buf *entrybuf;
3929         struct cfil_queue *pending_q;
3930
3931         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3932             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3933
3934         socket_lock_assert_owned(so);
3935
3936         entry = &cfil_info->cfi_entries[kcunit - 1];
3937         if (outgoing) {
3938                 entrybuf = &entry->cfe_snd;
3939         } else {
3940                 entrybuf = &entry->cfe_rcv;
3941         }
3942
3943         pending_q = &entrybuf->cfe_pending_q;
3944
3945         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3946
3947         /*
3948          * Locate the chunks of data that we can pass to the next filter
3949          * A data chunk must be on mbuf boundaries
3950          */
3951         curlen = 0;
3952         while ((data = cfil_queue_first(pending_q)) != NULL) {
3953                 struct cfil_entry *iter_entry;
3954                 datalen = cfil_data_length(data, NULL, NULL);
3955
3956 #if DATA_DEBUG
3957                 CFIL_LOG(LOG_DEBUG,
3958                     "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3959                     (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3960                     passlen, curlen);
3961 #endif
3962
3963                 if (curlen + datalen > passlen) {
3964                         break;
3965                 }
3966
3967                 cfil_queue_remove(pending_q, data, datalen);
3968
3969                 curlen += datalen;
3970
3971                 for (iter_entry = SLIST_NEXT(entry, cfe_order_link);
3972                     iter_entry != NULL;
3973                     iter_entry = SLIST_NEXT(iter_entry, cfe_order_link)) {
3974                         error = cfil_data_filter(so, cfil_info, CFI_ENTRY_KCUNIT(cfil_info, iter_entry), outgoing,
3975                             data, datalen);
3976                         /* 0 means passed so we can continue */
3977                         if (error != 0) {
3978                                 break;
3979                         }
3980                 }
3981                 /* When data has passed all filters, re-inject */
3982                 if (error == 0) {
3983                         if (outgoing) {
3984                                 cfil_queue_enqueue(
3985                                         &cfil_info->cfi_snd.cfi_inject_q,
3986                                         data, datalen);
3987                                 OSAddAtomic64(datalen,
3988                                     &cfil_stats.cfs_inject_q_out_enqueued);
3989                         } else {
3990                                 cfil_queue_enqueue(
3991                                         &cfil_info->cfi_rcv.cfi_inject_q,
3992                                         data, datalen);
3993                                 OSAddAtomic64(datalen,
3994                                     &cfil_stats.cfs_inject_q_in_enqueued);
3995                         }
3996                 }
3997         }
3998
3999         CFIL_INFO_VERIFY(cfil_info);
4000
4001         return error;
4002 }
4003
4004 int
4005 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4006     uint64_t pass_offset, uint64_t peek_offset)
4007 {
4008         errno_t error = 0;
4009         struct cfil_entry *entry = NULL;
4010         struct cfe_buf *entrybuf;
4011         int updated = 0;
4012
4013         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
4014
4015         socket_lock_assert_owned(so);
4016
4017         if (cfil_info == NULL) {
4018                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4019                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4020                 error = 0;
4021                 goto done;
4022         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4023                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4024                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4025                 error = EPIPE;
4026                 goto done;
4027         }
4028
4029         entry = &cfil_info->cfi_entries[kcunit - 1];
4030         if (outgoing) {
4031                 entrybuf = &entry->cfe_snd;
4032         } else {
4033                 entrybuf = &entry->cfe_rcv;
4034         }
4035
4036         /* Record updated offsets for this content filter */
4037         if (pass_offset > entrybuf->cfe_pass_offset) {
4038                 entrybuf->cfe_pass_offset = pass_offset;
4039
4040                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4041                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4042                 }
4043                 updated = 1;
4044         } else {
4045                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
4046                     pass_offset, entrybuf->cfe_pass_offset);
4047         }
4048         /* Filter does not want or need to see data that's allowed to pass */
4049         if (peek_offset > entrybuf->cfe_pass_offset &&
4050             peek_offset > entrybuf->cfe_peek_offset) {
4051                 entrybuf->cfe_peek_offset = peek_offset;
4052                 updated = 1;
4053         }
4054         /* Nothing to do */
4055         if (updated == 0) {
4056                 goto done;
4057         }
4058
4059         /* Move data held in control queue to pending queue if needed */
4060         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
4061         if (error != 0) {
4062                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
4063                     error);
4064                 goto done;
4065         }
4066         error = EJUSTRETURN;
4067
4068 done:
4069         /*
4070          * The filter is effectively detached when pass all from both sides
4071          * or when the socket is closed and no more data is waiting
4072          * to be delivered to the filter
4073          */
4074         if (entry != NULL &&
4075             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
4076             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
4077             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4078             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
4079             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
4080                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
4081 #if LIFECYCLE_DEBUG
4082                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
4083                     "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
4084                     "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
4085 #endif
4086                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
4087                     (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4088                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
4089                     cfil_filters_attached(so) == 0) {
4090 #if LIFECYCLE_DEBUG
4091                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
4092 #endif
4093                         CFIL_LOG(LOG_INFO, "so %llx waking",
4094                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4095                         wakeup((caddr_t)cfil_info);
4096                 }
4097         }
4098         CFIL_INFO_VERIFY(cfil_info);
4099         CFIL_LOG(LOG_INFO, "return %d", error);
4100         return error;
4101 }
4102
4103 /*
4104  * Update pass offset for socket when no data is pending
4105  */
4106 static int
4107 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
4108 {
4109         struct cfi_buf *cfi_buf;
4110         struct cfil_entry *entry;
4111         struct cfe_buf *entrybuf;
4112         uint32_t kcunit;
4113         uint64_t pass_offset = 0;
4114
4115         if (cfil_info == NULL) {
4116                 return 0;
4117         }
4118
4119         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
4120             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4121
4122         socket_lock_assert_owned(so);
4123
4124         if (outgoing) {
4125                 cfi_buf = &cfil_info->cfi_snd;
4126         } else {
4127                 cfi_buf = &cfil_info->cfi_rcv;
4128         }
4129
4130         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
4131             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
4132             cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
4133
4134         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
4135                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4136                         entry = &cfil_info->cfi_entries[kcunit - 1];
4137
4138                         /* Are we attached to a filter? */
4139                         if (entry->cfe_filter == NULL) {
4140                                 continue;
4141                         }
4142
4143                         if (outgoing) {
4144                                 entrybuf = &entry->cfe_snd;
4145                         } else {
4146                                 entrybuf = &entry->cfe_rcv;
4147                         }
4148
4149                         if (pass_offset == 0 ||
4150                             entrybuf->cfe_pass_offset < pass_offset) {
4151                                 pass_offset = entrybuf->cfe_pass_offset;
4152                         }
4153                 }
4154                 cfi_buf->cfi_pass_offset = pass_offset;
4155         }
4156
4157         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
4158             (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
4159
4160         return 0;
4161 }
4162
4163 int
4164 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
4165     uint64_t pass_offset, uint64_t peek_offset)
4166 {
4167         errno_t error = 0;
4168
4169         CFIL_LOG(LOG_INFO, "");
4170
4171         socket_lock_assert_owned(so);
4172
4173         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
4174         if (error != 0) {
4175                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
4176                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4177                     outgoing ? "out" : "in");
4178                 goto release;
4179         }
4180
4181         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
4182             pass_offset, peek_offset);
4183
4184         cfil_service_inject_queue(so, cfil_info, outgoing);
4185
4186         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
4187 release:
4188         CFIL_INFO_VERIFY(cfil_info);
4189         cfil_release_sockbuf(so, outgoing);
4190
4191         return error;
4192 }
4193
4194
4195 static void
4196 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
4197 {
4198         struct cfil_entry *entry;
4199         int kcunit;
4200         uint64_t drained;
4201
4202         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4203                 goto done;
4204         }
4205
4206         socket_lock_assert_owned(so);
4207
4208         /*
4209          * Flush the output queues and ignore errors as long as
4210          * we are attached
4211          */
4212         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
4213         if (cfil_info != NULL) {
4214                 drained = 0;
4215                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4216                         entry = &cfil_info->cfi_entries[kcunit - 1];
4217
4218                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
4219                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
4220                 }
4221                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
4222
4223                 if (drained) {
4224                         if (cfil_info->cfi_flags & CFIF_DROP) {
4225                                 OSIncrementAtomic(
4226                                         &cfil_stats.cfs_flush_out_drop);
4227                         } else {
4228                                 OSIncrementAtomic(
4229                                         &cfil_stats.cfs_flush_out_close);
4230                         }
4231                 }
4232         }
4233         cfil_release_sockbuf(so, 1);
4234
4235         /*
4236          * Flush the input queues
4237          */
4238         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
4239         if (cfil_info != NULL) {
4240                 drained = 0;
4241                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4242                         entry = &cfil_info->cfi_entries[kcunit - 1];
4243
4244                         drained += cfil_queue_drain(
4245                                 &entry->cfe_rcv.cfe_ctl_q);
4246                         drained += cfil_queue_drain(
4247                                 &entry->cfe_rcv.cfe_pending_q);
4248                 }
4249                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
4250
4251                 if (drained) {
4252                         if (cfil_info->cfi_flags & CFIF_DROP) {
4253                                 OSIncrementAtomic(
4254                                         &cfil_stats.cfs_flush_in_drop);
4255                         } else {
4256                                 OSIncrementAtomic(
4257                                         &cfil_stats.cfs_flush_in_close);
4258                         }
4259                 }
4260         }
4261         cfil_release_sockbuf(so, 0);
4262 done:
4263         CFIL_INFO_VERIFY(cfil_info);
4264 }
4265
4266 int
4267 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
4268 {
4269         errno_t error = 0;
4270         struct cfil_entry *entry;
4271         struct proc *p;
4272
4273         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL) {
4274                 goto done;
4275         }
4276
4277         socket_lock_assert_owned(so);
4278
4279         entry = &cfil_info->cfi_entries[kcunit - 1];
4280
4281         /* Are we attached to the filter? */
4282         if (entry->cfe_filter == NULL) {
4283                 goto done;
4284         }
4285
4286         cfil_info->cfi_flags |= CFIF_DROP;
4287
4288         p = current_proc();
4289
4290         /*
4291          * Force the socket to be marked defunct
4292          * (forcing fixed along with rdar://19391339)
4293          */
4294         if (so->so_cfil_db == NULL) {
4295                 error = sosetdefunct(p, so,
4296                     SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
4297                     FALSE);
4298
4299                 /* Flush the socket buffer and disconnect */
4300                 if (error == 0) {
4301                         error = sodefunct(p, so,
4302                             SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
4303                 }
4304         }
4305
4306         /* The filter is done, mark as detached */
4307         entry->cfe_flags |= CFEF_CFIL_DETACHED;
4308 #if LIFECYCLE_DEBUG
4309         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
4310 #endif
4311         CFIL_LOG(LOG_INFO, "so %llx detached %u",
4312             (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
4313
4314         /* Pending data needs to go */
4315         cfil_flush_queues(so, cfil_info);
4316
4317         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
4318                 if (cfil_filters_attached(so) == 0) {
4319                         CFIL_LOG(LOG_INFO, "so %llx waking",
4320                             (uint64_t)VM_KERNEL_ADDRPERM(so));
4321                         wakeup((caddr_t)cfil_info);
4322                 }
4323         }
4324 done:
4325         return error;
4326 }
4327
4328 int
4329 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4330 {
4331         errno_t error = 0;
4332         struct cfil_info *cfil_info = NULL;
4333
4334         bool cfil_attached = false;
4335         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
4336
4337         // Search and lock socket
4338         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
4339         if (so == NULL) {
4340                 error = ENOENT;
4341         } else {
4342                 // The client gets a pass automatically
4343                 cfil_info = (so->so_cfil_db != NULL) ?
4344                     cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
4345
4346                 if (cfil_attached) {
4347 #if VERDICT_DEBUG
4348                         if (cfil_info != NULL) {
4349                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
4350                                     cfil_info->cfi_hash_entry ? "UDP" : "TCP",
4351                                     (uint64_t)VM_KERNEL_ADDRPERM(so),
4352                                     cfil_info->cfi_sock_id);
4353                         }
4354 #endif
4355                         cfil_sock_received_verdict(so);
4356                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4357                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
4358                 } else {
4359                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
4360                 }
4361                 socket_unlock(so, 1);
4362         }
4363
4364         return error;
4365 }
4366
4367 int
4368 cfil_action_set_crypto_key(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
4369 {
4370         struct content_filter *cfc = NULL;
4371         cfil_crypto_state_t crypto_state = NULL;
4372         struct cfil_msg_set_crypto_key *keymsg = (struct cfil_msg_set_crypto_key *)msghdr;
4373
4374         CFIL_LOG(LOG_NOTICE, "");
4375
4376         if (content_filters == NULL) {
4377                 CFIL_LOG(LOG_ERR, "no content filter");
4378                 return EINVAL;
4379         }
4380         if (kcunit > MAX_CONTENT_FILTER) {
4381                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
4382                     kcunit, MAX_CONTENT_FILTER);
4383                 return EINVAL;
4384         }
4385         crypto_state = cfil_crypto_init_client((uint8_t *)keymsg->crypto_key);
4386         if (crypto_state == NULL) {
4387                 CFIL_LOG(LOG_ERR, "failed to initialize crypto state for unit %u)",
4388                     kcunit);
4389                 return EINVAL;
4390         }
4391
4392         cfil_rw_lock_exclusive(&cfil_lck_rw);
4393
4394         cfc = content_filters[kcunit - 1];
4395         if (cfc->cf_kcunit != kcunit) {
4396                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
4397                     kcunit);
4398                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
4399                 cfil_crypto_cleanup_state(crypto_state);
4400                 return EINVAL;
4401         }
4402         if (cfc->cf_crypto_state != NULL) {
4403                 cfil_crypto_cleanup_state(cfc->cf_crypto_state);
4404                 cfc->cf_crypto_state = NULL;
4405         }
4406         cfc->cf_crypto_state = crypto_state;
4407
4408         cfil_rw_unlock_exclusive(&cfil_lck_rw);
4409         return 0;
4410 }
4411
4412 static int
4413 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
4414 {
4415         struct cfil_entry *entry;
4416         struct cfe_buf *entrybuf;
4417         uint32_t kcunit;
4418
4419         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
4420             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
4421
4422         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4423                 entry = &cfil_info->cfi_entries[kcunit - 1];
4424
4425                 /* Are we attached to the filter? */
4426                 if (entry->cfe_filter == NULL) {
4427                         continue;
4428                 }
4429
4430                 if (outgoing) {
4431                         entrybuf = &entry->cfe_snd;
4432                 } else {
4433                         entrybuf = &entry->cfe_rcv;
4434                 }
4435
4436                 entrybuf->cfe_ctl_q.q_start += datalen;
4437                 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
4438                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
4439                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset) {
4440                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
4441                 }
4442
4443                 entrybuf->cfe_ctl_q.q_end += datalen;
4444
4445                 entrybuf->cfe_pending_q.q_start += datalen;
4446                 entrybuf->cfe_pending_q.q_end += datalen;
4447         }
4448         CFIL_INFO_VERIFY(cfil_info);
4449         return 0;
4450 }
4451
4452 int
4453 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
4454     struct mbuf *data, struct mbuf *control, uint32_t flags)
4455 {
4456 #pragma unused(to, control, flags)
4457         errno_t error = 0;
4458         unsigned int datalen;
4459         int mbcnt = 0;
4460         int mbnum = 0;
4461         int kcunit;
4462         struct cfi_buf *cfi_buf;
4463         struct mbuf *chain = NULL;
4464
4465         if (cfil_info == NULL) {
4466                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
4467                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4468                 error = 0;
4469                 goto done;
4470         } else if (cfil_info->cfi_flags & CFIF_DROP) {
4471                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4472                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4473                 error = EPIPE;
4474                 goto done;
4475         }
4476
4477         datalen = cfil_data_length(data, &mbcnt, &mbnum);
4478
4479         if (outgoing) {
4480                 cfi_buf = &cfil_info->cfi_snd;
4481                 cfil_info->cfi_byte_outbound_count += datalen;
4482         } else {
4483                 cfi_buf = &cfil_info->cfi_rcv;
4484                 cfil_info->cfi_byte_inbound_count += datalen;
4485         }
4486
4487         cfi_buf->cfi_pending_last += datalen;
4488         cfi_buf->cfi_pending_mbcnt += mbcnt;
4489         cfi_buf->cfi_pending_mbnum += mbnum;
4490
4491         if (IS_UDP(so)) {
4492                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
4493                     cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
4494                         cfi_buf->cfi_tail_drop_cnt++;
4495                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
4496                         cfi_buf->cfi_pending_mbnum -= mbnum;
4497                         return EPIPE;
4498                 }
4499         }
4500
4501         cfil_info_buf_verify(cfi_buf);
4502
4503 #if DATA_DEBUG
4504         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
4505             (uint64_t)VM_KERNEL_ADDRPERM(so),
4506             outgoing ? "OUT" : "IN",
4507             (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
4508             (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
4509             cfi_buf->cfi_pending_last,
4510             cfi_buf->cfi_pending_mbcnt,
4511             cfi_buf->cfi_pass_offset);
4512 #endif
4513
4514         /* Fast path when below pass offset */
4515         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
4516                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
4517 #if DATA_DEBUG
4518                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
4519 #endif
4520         } else {
4521                 struct cfil_entry *iter_entry;
4522                 SLIST_FOREACH(iter_entry, &cfil_info->cfi_ordered_entries, cfe_order_link) {
4523                         // Is cfil attached to this filter?
4524                         kcunit = CFI_ENTRY_KCUNIT(cfil_info, iter_entry);
4525                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
4526                                 if (IS_UDP(so) && chain == NULL) {
4527                                         /* UDP only:
4528                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
4529                                          * This full chain will be reinjected into socket after recieving verdict.
4530                                          */
4531                                         (void) cfil_udp_save_socket_state(cfil_info, data);
4532                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
4533                                         if (chain == NULL) {
4534                                                 return ENOBUFS;
4535                                         }
4536                                         data = chain;
4537                                 }
4538                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
4539                                     datalen);
4540                         }
4541                         /* 0 means passed so continue with next filter */
4542                         if (error != 0) {
4543                                 break;
4544                         }
4545                 }
4546         }
4547
4548         /* Move cursor if no filter claimed the data */
4549         if (error == 0) {
4550                 cfi_buf->cfi_pending_first += datalen;
4551                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
4552                 cfi_buf->cfi_pending_mbnum -= mbnum;
4553                 cfil_info_buf_verify(cfi_buf);
4554         }
4555 done:
4556         CFIL_INFO_VERIFY(cfil_info);
4557
4558         return error;
4559 }
4560
4561 /*
4562  * Callback from socket layer sosendxxx()
4563  */
4564 int
4565 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4566     struct mbuf *data, struct mbuf *control, uint32_t flags)
4567 {
4568         int error = 0;
4569
4570         if (IS_UDP(so)) {
4571                 return cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags);
4572         }
4573
4574         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4575                 return 0;
4576         }
4577
4578         /*
4579          * Pass initial data for TFO.
4580          */
4581         if (IS_INITIAL_TFO_DATA(so)) {
4582                 return 0;
4583         }
4584
4585         socket_lock_assert_owned(so);
4586
4587         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4588                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4589                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4590                 return EPIPE;
4591         }
4592         if (control != NULL) {
4593                 CFIL_LOG(LOG_ERR, "so %llx control",
4594                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4595                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4596         }
4597         if ((flags & MSG_OOB)) {
4598                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4599                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4600                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4601         }
4602         if ((so->so_snd.sb_flags & SB_LOCK) == 0) {
4603                 panic("so %p SB_LOCK not set", so);
4604         }
4605
4606         if (so->so_snd.sb_cfil_thread != NULL) {
4607                 panic("%s sb_cfil_thread %p not NULL", __func__,
4608                     so->so_snd.sb_cfil_thread);
4609         }
4610
4611         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4612
4613         return error;
4614 }
4615
4616 /*
4617  * Callback from socket layer sbappendxxx()
4618  */
4619 int
4620 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4621     struct mbuf *data, struct mbuf *control, uint32_t flags)
4622 {
4623         int error = 0;
4624
4625         if (IS_UDP(so)) {
4626                 return cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags);
4627         }
4628
4629         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4630                 return 0;
4631         }
4632
4633         /*
4634          * Pass initial data for TFO.
4635          */
4636         if (IS_INITIAL_TFO_DATA(so)) {
4637                 return 0;
4638         }
4639
4640         socket_lock_assert_owned(so);
4641
4642         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4643                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4644                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4645                 return EPIPE;
4646         }
4647         if (control != NULL) {
4648                 CFIL_LOG(LOG_ERR, "so %llx control",
4649                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4650                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4651         }
4652         if (data->m_type == MT_OOBDATA) {
4653                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4654                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4655                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4656         }
4657         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4658
4659         return error;
4660 }
4661
4662 /*
4663  * Callback from socket layer soshutdownxxx()
4664  *
4665  * We may delay the shutdown write if there's outgoing data in process.
4666  *
4667  * There is no point in delaying the shutdown read because the process
4668  * indicated that it does not want to read anymore data.
4669  */
4670 int
4671 cfil_sock_shutdown(struct socket *so, int *how)
4672 {
4673         int error = 0;
4674
4675         if (IS_UDP(so)) {
4676                 return cfil_sock_udp_shutdown(so, how);
4677         }
4678
4679         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4680                 goto done;
4681         }
4682
4683         socket_lock_assert_owned(so);
4684
4685         CFIL_LOG(LOG_INFO, "so %llx how %d",
4686             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4687
4688         /*
4689          * Check the state of the socket before the content filter
4690          */
4691         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4692                 /* read already shut down */
4693                 error = ENOTCONN;
4694                 goto done;
4695         }
4696         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4697                 /* write already shut down */
4698                 error = ENOTCONN;
4699                 goto done;
4700         }
4701
4702         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4703                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4704                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4705                 goto done;
4706         }
4707
4708         /*
4709          * shutdown read: SHUT_RD or SHUT_RDWR
4710          */
4711         if (*how != SHUT_WR) {
4712                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4713                         error = ENOTCONN;
4714                         goto done;
4715                 }
4716                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4717                 cfil_sock_notify_shutdown(so, SHUT_RD);
4718         }
4719         /*
4720          * shutdown write: SHUT_WR or SHUT_RDWR
4721          */
4722         if (*how != SHUT_RD) {
4723                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4724                         error = ENOTCONN;
4725                         goto done;
4726                 }
4727                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4728                 cfil_sock_notify_shutdown(so, SHUT_WR);
4729                 /*
4730                  * When outgoing data is pending, we delay the shutdown at the
4731                  * protocol level until the content filters give the final
4732                  * verdict on the pending data.
4733                  */
4734                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4735                         /*
4736                          * When shutting down the read and write sides at once
4737                          * we can proceed to the final shutdown of the read
4738                          * side. Otherwise, we just return.
4739                          */
4740                         if (*how == SHUT_WR) {
4741                                 error = EJUSTRETURN;
4742                         } else if (*how == SHUT_RDWR) {
4743                                 *how = SHUT_RD;
4744                         }
4745                 }
4746         }
4747 done:
4748         return error;
4749 }
4750
4751 /*
4752  * This is called when the socket is closed and there is no more
4753  * opportunity for filtering
4754  */
4755 void
4756 cfil_sock_is_closed(struct socket *so)
4757 {
4758         errno_t error = 0;
4759         int kcunit;
4760
4761         if (IS_UDP(so)) {
4762                 cfil_sock_udp_is_closed(so);
4763                 return;
4764         }
4765
4766         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4767                 return;
4768         }
4769
4770         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4771
4772         socket_lock_assert_owned(so);
4773
4774         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4775                 /* Let the filters know of the closing */
4776                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4777         }
4778
4779         /* Last chance to push passed data out */
4780         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4781         if (error == 0) {
4782                 cfil_service_inject_queue(so, so->so_cfil, 1);
4783         }
4784         cfil_release_sockbuf(so, 1);
4785
4786         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4787
4788         /* Pending data needs to go */
4789         cfil_flush_queues(so, so->so_cfil);
4790
4791         CFIL_INFO_VERIFY(so->so_cfil);
4792 }
4793
4794 /*
4795  * This is called when the socket is disconnected so let the filters
4796  * know about the disconnection and that no more data will come
4797  *
4798  * The how parameter has the same values as soshutown()
4799  */
4800 void
4801 cfil_sock_notify_shutdown(struct socket *so, int how)
4802 {
4803         errno_t error = 0;
4804         int kcunit;
4805
4806         if (IS_UDP(so)) {
4807                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4808                 return;
4809         }
4810
4811         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4812                 return;
4813         }
4814
4815         CFIL_LOG(LOG_INFO, "so %llx how %d",
4816             (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4817
4818         socket_lock_assert_owned(so);
4819
4820         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4821                 /* Disconnect incoming side */
4822                 if (how != SHUT_WR) {
4823                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4824                 }
4825                 /* Disconnect outgoing side */
4826                 if (how != SHUT_RD) {
4827                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
4828                 }
4829         }
4830 }
4831
4832 static int
4833 cfil_filters_attached(struct socket *so)
4834 {
4835         struct cfil_entry *entry;
4836         uint32_t kcunit;
4837         int attached = 0;
4838
4839         if (IS_UDP(so)) {
4840                 return cfil_filters_udp_attached(so, FALSE);
4841         }
4842
4843         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4844                 return 0;
4845         }
4846
4847         socket_lock_assert_owned(so);
4848
4849         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4850                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
4851
4852                 /* Are we attached to the filter? */
4853                 if (entry->cfe_filter == NULL) {
4854                         continue;
4855                 }
4856                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
4857                         continue;
4858                 }
4859                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
4860                         continue;
4861                 }
4862                 attached = 1;
4863                 break;
4864         }
4865
4866         return attached;
4867 }
4868
4869 /*
4870  * This is called when the socket is closed and we are waiting for
4871  * the filters to gives the final pass or drop
4872  */
4873 void
4874 cfil_sock_close_wait(struct socket *so)
4875 {
4876         lck_mtx_t *mutex_held;
4877         struct timespec ts;
4878         int error;
4879
4880         if (IS_UDP(so)) {
4881                 cfil_sock_udp_close_wait(so);
4882                 return;
4883         }
4884
4885         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
4886                 return;
4887         }
4888
4889         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4890
4891         if (so->so_proto->pr_getlock != NULL) {
4892                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4893         } else {
4894                 mutex_held = so->so_proto->pr_domain->dom_mtx;
4895         }
4896         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4897
4898         while (cfil_filters_attached(so)) {
4899                 /*
4900                  * Notify the filters we are going away so they can detach
4901                  */
4902                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
4903
4904                 /*
4905                  * Make sure we need to wait after the filter are notified
4906                  * of the disconnection
4907                  */
4908                 if (cfil_filters_attached(so) == 0) {
4909                         break;
4910                 }
4911
4912                 CFIL_LOG(LOG_INFO, "so %llx waiting",
4913                     (uint64_t)VM_KERNEL_ADDRPERM(so));
4914
4915                 ts.tv_sec = cfil_close_wait_timeout / 1000;
4916                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
4917                     NSEC_PER_USEC * 1000;
4918
4919                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4920                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
4921                 error = msleep((caddr_t)so->so_cfil, mutex_held,
4922                     PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
4923                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4924
4925                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
4926                     (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
4927
4928                 /*
4929                  * Force close in case of timeout
4930                  */
4931                 if (error != 0) {
4932                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4933                         break;
4934                 }
4935         }
4936 }
4937
4938 /*
4939  * Returns the size of the data held by the content filter by using
4940  */
4941 int32_t
4942 cfil_sock_data_pending(struct sockbuf *sb)
4943 {
4944         struct socket *so = sb->sb_so;
4945         uint64_t pending = 0;
4946
4947         if (IS_UDP(so)) {
4948                 return cfil_sock_udp_data_pending(sb, FALSE);
4949         }
4950
4951         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
4952                 struct cfi_buf *cfi_buf;
4953
4954                 socket_lock_assert_owned(so);
4955
4956                 if ((sb->sb_flags & SB_RECV) == 0) {
4957                         cfi_buf = &so->so_cfil->cfi_snd;
4958                 } else {
4959                         cfi_buf = &so->so_cfil->cfi_rcv;
4960                 }
4961
4962                 pending = cfi_buf->cfi_pending_last -
4963                     cfi_buf->cfi_pending_first;
4964
4965                 /*
4966                  * If we are limited by the "chars of mbufs used" roughly
4967                  * adjust so we won't overcommit
4968                  */
4969                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt) {
4970                         pending = cfi_buf->cfi_pending_mbcnt;
4971                 }
4972         }
4973
4974         VERIFY(pending < INT32_MAX);
4975
4976         return (int32_t)(pending);
4977 }
4978
4979 /*
4980  * Return the socket buffer space used by data being held by content filters
4981  * so processes won't clog the socket buffer
4982  */
4983 int32_t
4984 cfil_sock_data_space(struct sockbuf *sb)
4985 {
4986         struct socket *so = sb->sb_so;
4987         uint64_t pending = 0;
4988
4989         if (IS_UDP(so)) {
4990                 return cfil_sock_udp_data_pending(sb, TRUE);
4991         }
4992
4993         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
4994             so->so_snd.sb_cfil_thread != current_thread()) {
4995                 struct cfi_buf *cfi_buf;
4996
4997                 socket_lock_assert_owned(so);
4998
4999                 if ((sb->sb_flags & SB_RECV) == 0) {
5000                         cfi_buf = &so->so_cfil->cfi_snd;
5001                 } else {
5002                         cfi_buf = &so->so_cfil->cfi_rcv;
5003                 }
5004
5005                 pending = cfi_buf->cfi_pending_last -
5006                     cfi_buf->cfi_pending_first;
5007
5008                 /*
5009                  * If we are limited by the "chars of mbufs used" roughly
5010                  * adjust so we won't overcommit
5011                  */
5012                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
5013                         pending = cfi_buf->cfi_pending_mbcnt;
5014                 }
5015         }
5016
5017         VERIFY(pending < INT32_MAX);
5018
5019         return (int32_t)(pending);
5020 }
5021
5022 /*
5023  * A callback from the socket and protocol layer when data becomes
5024  * available in the socket buffer to give a chance for the content filter
5025  * to re-inject data that was held back
5026  */
5027 void
5028 cfil_sock_buf_update(struct sockbuf *sb)
5029 {
5030         int outgoing;
5031         int error;
5032         struct socket *so = sb->sb_so;
5033
5034         if (IS_UDP(so)) {
5035                 cfil_sock_udp_buf_update(sb);
5036                 return;
5037         }
5038
5039         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL) {
5040                 return;
5041         }
5042
5043         if (!cfil_sbtrim) {
5044                 return;
5045         }
5046
5047         socket_lock_assert_owned(so);
5048
5049         if ((sb->sb_flags & SB_RECV) == 0) {
5050                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
5051                         return;
5052                 }
5053                 outgoing = 1;
5054                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5055         } else {
5056                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
5057                         return;
5058                 }
5059                 outgoing = 0;
5060                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5061         }
5062
5063         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5064             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5065
5066         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
5067         if (error == 0) {
5068                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
5069         }
5070         cfil_release_sockbuf(so, outgoing);
5071 }
5072
5073 int
5074 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5075     struct sysctl_req *req)
5076 {
5077 #pragma unused(oidp, arg1, arg2)
5078         int error = 0;
5079         size_t len = 0;
5080         u_int32_t i;
5081
5082         /* Read only  */
5083         if (req->newptr != USER_ADDR_NULL) {
5084                 return EPERM;
5085         }
5086
5087         cfil_rw_lock_shared(&cfil_lck_rw);
5088
5089         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
5090                 struct cfil_filter_stat filter_stat;
5091                 struct content_filter *cfc = content_filters[i];
5092
5093                 if (cfc == NULL) {
5094                         continue;
5095                 }
5096
5097                 /* If just asking for the size */
5098                 if (req->oldptr == USER_ADDR_NULL) {
5099                         len += sizeof(struct cfil_filter_stat);
5100                         continue;
5101                 }
5102
5103                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
5104                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
5105                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
5106                 filter_stat.cfs_flags = cfc->cf_flags;
5107                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
5108                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
5109
5110                 error = SYSCTL_OUT(req, &filter_stat,
5111                     sizeof(struct cfil_filter_stat));
5112                 if (error != 0) {
5113                         break;
5114                 }
5115         }
5116         /* If just asking for the size */
5117         if (req->oldptr == USER_ADDR_NULL) {
5118                 req->oldidx = len;
5119         }
5120
5121         cfil_rw_unlock_shared(&cfil_lck_rw);
5122
5123 #if SHOW_DEBUG
5124         if (req->oldptr != USER_ADDR_NULL) {
5125                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
5126                         cfil_filter_show(i);
5127                 }
5128         }
5129 #endif
5130
5131         return error;
5132 }
5133
5134 static int
5135 sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
5136     struct sysctl_req *req)
5137 {
5138 #pragma unused(oidp, arg1, arg2)
5139         int error = 0;
5140         u_int32_t i;
5141         struct cfil_info *cfi;
5142
5143         /* Read only  */
5144         if (req->newptr != USER_ADDR_NULL) {
5145                 return EPERM;
5146         }
5147
5148         cfil_rw_lock_shared(&cfil_lck_rw);
5149
5150         /*
5151          * If just asking for the size,
5152          */
5153         if (req->oldptr == USER_ADDR_NULL) {
5154                 req->oldidx = cfil_sock_attached_count *
5155                     sizeof(struct cfil_sock_stat);
5156                 /* Bump the length in case new sockets gets attached */
5157                 req->oldidx += req->oldidx >> 3;
5158                 goto done;
5159         }
5160
5161         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
5162                 struct cfil_entry *entry;
5163                 struct cfil_sock_stat stat;
5164                 struct socket *so = cfi->cfi_so;
5165
5166                 bzero(&stat, sizeof(struct cfil_sock_stat));
5167                 stat.cfs_len = sizeof(struct cfil_sock_stat);
5168                 stat.cfs_sock_id = cfi->cfi_sock_id;
5169                 stat.cfs_flags = cfi->cfi_flags;
5170
5171                 if (so != NULL) {
5172                         stat.cfs_pid = so->last_pid;
5173                         memcpy(stat.cfs_uuid, so->last_uuid,
5174                             sizeof(uuid_t));
5175                         if (so->so_flags & SOF_DELEGATED) {
5176                                 stat.cfs_e_pid = so->e_pid;
5177                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
5178                                     sizeof(uuid_t));
5179                         } else {
5180                                 stat.cfs_e_pid = so->last_pid;
5181                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
5182                                     sizeof(uuid_t));
5183                         }
5184
5185                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
5186                         stat.cfs_sock_type = so->so_proto->pr_type;
5187                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
5188                 }
5189
5190                 stat.cfs_snd.cbs_pending_first =
5191                     cfi->cfi_snd.cfi_pending_first;
5192                 stat.cfs_snd.cbs_pending_last =
5193                     cfi->cfi_snd.cfi_pending_last;
5194                 stat.cfs_snd.cbs_inject_q_len =
5195                     cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
5196                 stat.cfs_snd.cbs_pass_offset =
5197                     cfi->cfi_snd.cfi_pass_offset;
5198
5199                 stat.cfs_rcv.cbs_pending_first =
5200                     cfi->cfi_rcv.cfi_pending_first;
5201                 stat.cfs_rcv.cbs_pending_last =
5202                     cfi->cfi_rcv.cfi_pending_last;
5203                 stat.cfs_rcv.cbs_inject_q_len =
5204                     cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
5205                 stat.cfs_rcv.cbs_pass_offset =
5206                     cfi->cfi_rcv.cfi_pass_offset;
5207
5208                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
5209                         struct cfil_entry_stat *estat;
5210                         struct cfe_buf *ebuf;
5211                         struct cfe_buf_stat *sbuf;
5212
5213                         entry = &cfi->cfi_entries[i];
5214
5215                         estat = &stat.ces_entries[i];
5216
5217                         estat->ces_len = sizeof(struct cfil_entry_stat);
5218                         estat->ces_filter_id = entry->cfe_filter ?
5219                             entry->cfe_filter->cf_kcunit : 0;
5220                         estat->ces_flags = entry->cfe_flags;
5221                         estat->ces_necp_control_unit =
5222                             entry->cfe_necp_control_unit;
5223
5224                         estat->ces_last_event.tv_sec =
5225                             (int64_t)entry->cfe_last_event.tv_sec;
5226                         estat->ces_last_event.tv_usec =
5227                             (int64_t)entry->cfe_last_event.tv_usec;
5228
5229                         estat->ces_last_action.tv_sec =
5230                             (int64_t)entry->cfe_last_action.tv_sec;
5231                         estat->ces_last_action.tv_usec =
5232                             (int64_t)entry->cfe_last_action.tv_usec;
5233
5234                         ebuf = &entry->cfe_snd;
5235                         sbuf = &estat->ces_snd;
5236                         sbuf->cbs_pending_first =
5237                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5238                         sbuf->cbs_pending_last =
5239                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5240                         sbuf->cbs_ctl_first =
5241                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5242                         sbuf->cbs_ctl_last =
5243                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5244                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5245                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5246                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5247
5248                         ebuf = &entry->cfe_rcv;
5249                         sbuf = &estat->ces_rcv;
5250                         sbuf->cbs_pending_first =
5251                             cfil_queue_offset_first(&ebuf->cfe_pending_q);
5252                         sbuf->cbs_pending_last =
5253                             cfil_queue_offset_last(&ebuf->cfe_pending_q);
5254                         sbuf->cbs_ctl_first =
5255                             cfil_queue_offset_first(&ebuf->cfe_ctl_q);
5256                         sbuf->cbs_ctl_last =
5257                             cfil_queue_offset_last(&ebuf->cfe_ctl_q);
5258                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
5259                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
5260                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
5261                 }
5262                 error = SYSCTL_OUT(req, &stat,
5263                     sizeof(struct cfil_sock_stat));
5264                 if (error != 0) {
5265                         break;
5266                 }
5267         }
5268 done:
5269         cfil_rw_unlock_shared(&cfil_lck_rw);
5270
5271 #if SHOW_DEBUG
5272         if (req->oldptr != USER_ADDR_NULL) {
5273                 cfil_info_show();
5274         }
5275 #endif
5276
5277         return error;
5278 }
5279
5280 /*
5281  * UDP Socket Support
5282  */
5283 static void
5284 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
5285 {
5286         char local[MAX_IPv6_STR_LEN + 6];
5287         char remote[MAX_IPv6_STR_LEN + 6];
5288         const void  *addr;
5289
5290         // No sock or not UDP, no-op
5291         if (so == NULL || entry == NULL) {
5292                 return;
5293         }
5294
5295         local[0] = remote[0] = 0x0;
5296
5297         switch (entry->cfentry_family) {
5298         case AF_INET6:
5299                 addr = &entry->cfentry_laddr.addr6;
5300                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5301                 addr = &entry->cfentry_faddr.addr6;
5302                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5303                 break;
5304         case AF_INET:
5305                 addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5306                 inet_ntop(AF_INET, addr, local, sizeof(local));
5307                 addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5308                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5309                 break;
5310         default:
5311                 return;
5312         }
5313
5314         CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
5315             msg,
5316             (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
5317             ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
5318 }
5319
5320 static void
5321 cfil_inp_log(int level, struct socket *so, const char* msg)
5322 {
5323         struct inpcb *inp = NULL;
5324         char local[MAX_IPv6_STR_LEN + 6];
5325         char remote[MAX_IPv6_STR_LEN + 6];
5326         const void  *addr;
5327
5328         if (so == NULL) {
5329                 return;
5330         }
5331
5332         inp = sotoinpcb(so);
5333         if (inp == NULL) {
5334                 return;
5335         }
5336
5337         local[0] = remote[0] = 0x0;
5338
5339 #if INET6
5340         if (inp->inp_vflag & INP_IPV6) {
5341                 addr = &inp->in6p_laddr.s6_addr32;
5342                 inet_ntop(AF_INET6, addr, local, sizeof(local));
5343                 addr = &inp->in6p_faddr.s6_addr32;
5344                 inet_ntop(AF_INET6, addr, remote, sizeof(local));
5345         } else
5346 #endif /* INET6 */
5347         {
5348                 addr = &inp->inp_laddr.s_addr;
5349                 inet_ntop(AF_INET, addr, local, sizeof(local));
5350                 addr = &inp->inp_faddr.s_addr;
5351                 inet_ntop(AF_INET, addr, remote, sizeof(local));
5352         }
5353
5354         if (so->so_cfil != NULL) {
5355                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
5356                     msg, IS_UDP(so) ? "UDP" : "TCP",
5357                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
5358                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5359         } else {
5360                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
5361                     msg, IS_UDP(so) ? "UDP" : "TCP",
5362                     (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
5363                     ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
5364         }
5365 }
5366
5367 static void
5368 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
5369 {
5370         if (cfil_info == NULL) {
5371                 return;
5372         }
5373
5374         if (cfil_info->cfi_hash_entry != NULL) {
5375                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
5376         } else {
5377                 cfil_inp_log(level, cfil_info->cfi_so, msg);
5378         }
5379 }
5380
5381 errno_t
5382 cfil_db_init(struct socket *so)
5383 {
5384         errno_t error = 0;
5385         struct cfil_db *db = NULL;
5386
5387         CFIL_LOG(LOG_INFO, "");
5388
5389         db = zalloc(cfil_db_zone);
5390         if (db == NULL) {
5391                 error = ENOMEM;
5392                 goto done;
5393         }
5394         bzero(db, sizeof(struct cfil_db));
5395         db->cfdb_so = so;
5396         db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
5397         if (db->cfdb_hashbase == NULL) {
5398                 zfree(cfil_db_zone, db);
5399                 db = NULL;
5400                 error = ENOMEM;
5401                 goto done;
5402         }
5403
5404         so->so_cfil_db = db;
5405
5406 done:
5407         return error;
5408 }
5409
5410 void
5411 cfil_db_free(struct socket *so)
5412 {
5413         struct cfil_hash_entry *entry = NULL;
5414         struct cfil_hash_entry *temp_entry = NULL;
5415         struct cfilhashhead *cfilhash = NULL;
5416         struct cfil_db *db = NULL;
5417
5418         CFIL_LOG(LOG_INFO, "");
5419
5420         if (so == NULL || so->so_cfil_db == NULL) {
5421                 return;
5422         }
5423         db = so->so_cfil_db;
5424
5425 #if LIFECYCLE_DEBUG
5426         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
5427             (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
5428 #endif
5429
5430         for (int i = 0; i < CFILHASHSIZE; i++) {
5431                 cfilhash = &db->cfdb_hashbase[i];
5432                 LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
5433                         if (entry->cfentry_cfil != NULL) {
5434 #if LIFECYCLE_DEBUG
5435                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
5436 #endif
5437                                 cfil_info_free(entry->cfentry_cfil);
5438                                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5439                                 entry->cfentry_cfil = NULL;
5440                         }
5441
5442                         cfil_db_delete_entry(db, entry);
5443                         if (so->so_flags & SOF_CONTENT_FILTER) {
5444                                 if (db->cfdb_count == 0) {
5445                                         so->so_flags &= ~SOF_CONTENT_FILTER;
5446                                 }
5447                                 VERIFY(so->so_usecount > 0);
5448                                 so->so_usecount--;
5449                         }
5450                 }
5451         }
5452
5453         // Make sure all entries are cleaned up!
5454         VERIFY(db->cfdb_count == 0);
5455 #if LIFECYCLE_DEBUG
5456         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
5457 #endif
5458
5459         FREE(db->cfdb_hashbase, M_CFIL);
5460         zfree(cfil_db_zone, db);
5461         so->so_cfil_db = NULL;
5462 }
5463
5464 static bool
5465 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
5466 {
5467         struct sockaddr_in *sin = NULL;
5468         struct sockaddr_in6 *sin6 = NULL;
5469
5470         if (entry == NULL || addr == NULL) {
5471                 return FALSE;
5472         }
5473
5474         switch (addr->sa_family) {
5475         case AF_INET:
5476                 sin = satosin(addr);
5477                 if (sin->sin_len != sizeof(*sin)) {
5478                         return FALSE;
5479                 }
5480                 if (isLocal == TRUE) {
5481                         entry->cfentry_lport = sin->sin_port;
5482                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5483                 } else {
5484                         entry->cfentry_fport = sin->sin_port;
5485                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
5486                 }
5487                 entry->cfentry_family = AF_INET;
5488                 return TRUE;
5489         case AF_INET6:
5490                 sin6 = satosin6(addr);
5491                 if (sin6->sin6_len != sizeof(*sin6)) {
5492                         return FALSE;
5493                 }
5494                 if (isLocal == TRUE) {
5495                         entry->cfentry_lport = sin6->sin6_port;
5496                         entry->cfentry_laddr.addr6 = sin6->sin6_addr;
5497                 } else {
5498                         entry->cfentry_fport = sin6->sin6_port;
5499                         entry->cfentry_faddr.addr6 = sin6->sin6_addr;
5500                 }
5501                 entry->cfentry_family = AF_INET6;
5502                 return TRUE;
5503         default:
5504                 return FALSE;
5505         }
5506 }
5507
5508 static bool
5509 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
5510 {
5511         if (entry == NULL || inp == NULL) {
5512                 return FALSE;
5513         }
5514
5515         if (inp->inp_vflag & INP_IPV4) {
5516                 if (isLocal == TRUE) {
5517                         entry->cfentry_lport = inp->inp_lport;
5518                         entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
5519                 } else {
5520                         entry->cfentry_fport = inp->inp_fport;
5521                         entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
5522                 }
5523                 entry->cfentry_family = AF_INET;
5524                 return TRUE;
5525         } else if (inp->inp_vflag & INP_IPV6) {
5526                 if (isLocal == TRUE) {
5527                         entry->cfentry_lport = inp->inp_lport;
5528                         entry->cfentry_laddr.addr6 = inp->in6p_laddr;
5529                 } else {
5530                         entry->cfentry_fport = inp->inp_fport;
5531                         entry->cfentry_faddr.addr6 = inp->in6p_faddr;
5532                 }
5533                 entry->cfentry_family = AF_INET6;
5534                 return TRUE;
5535         }
5536         return FALSE;
5537 }
5538
5539 bool
5540 check_port(struct sockaddr *addr, u_short port)
5541 {
5542         struct sockaddr_in *sin = NULL;
5543         struct sockaddr_in6 *sin6 = NULL;
5544
5545         if (addr == NULL || port == 0) {
5546                 return FALSE;
5547         }
5548
5549         switch (addr->sa_family) {
5550         case AF_INET:
5551                 sin = satosin(addr);
5552                 if (sin->sin_len != sizeof(*sin)) {
5553                         return FALSE;
5554                 }
5555                 if (port == ntohs(sin->sin_port)) {
5556                         return TRUE;
5557                 }
5558                 break;
5559         case AF_INET6:
5560                 sin6 = satosin6(addr);
5561                 if (sin6->sin6_len != sizeof(*sin6)) {
5562                         return FALSE;
5563                 }
5564                 if (port == ntohs(sin6->sin6_port)) {
5565                         return TRUE;
5566                 }
5567                 break;
5568         default:
5569                 break;
5570         }
5571         return FALSE;
5572 }
5573
5574 struct cfil_hash_entry *
5575 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
5576 {
5577         struct cfilhashhead *cfilhash = NULL;
5578         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
5579         struct cfil_hash_entry *nextentry;
5580
5581         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
5582                 return NULL;
5583         }
5584
5585         flowhash &= db->cfdb_hashmask;
5586         cfilhash = &db->cfdb_hashbase[flowhash];
5587
5588         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5589                 if (nextentry->cfentry_cfil != NULL &&
5590                     nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
5591                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
5592                             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
5593                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
5594                         return nextentry;
5595                 }
5596         }
5597
5598         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
5599             (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
5600         return NULL;
5601 }
5602
5603 struct cfil_hash_entry *
5604 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5605 {
5606         struct cfil_hash_entry matchentry;
5607         struct cfil_hash_entry *nextentry = NULL;
5608         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5609         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5610         int inp_hash_element = 0;
5611         struct cfilhashhead *cfilhash = NULL;
5612
5613         CFIL_LOG(LOG_INFO, "");
5614
5615         if (inp == NULL) {
5616                 goto done;
5617         }
5618
5619         if (local != NULL) {
5620                 fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5621         } else {
5622                 fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5623         }
5624         if (remote != NULL) {
5625                 fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5626         } else {
5627                 fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5628         }
5629
5630 #if INET6
5631         if (inp->inp_vflag & INP_IPV6) {
5632                 hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5633                 hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5634         } else
5635 #endif /* INET6 */
5636         {
5637                 hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5638                 hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5639         }
5640
5641         inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5642             matchentry.cfentry_lport, matchentry.cfentry_fport);
5643         inp_hash_element &= db->cfdb_hashmask;
5644
5645         cfilhash = &db->cfdb_hashbase[inp_hash_element];
5646
5647         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5648 #if INET6
5649                 if ((inp->inp_vflag & INP_IPV6) &&
5650                     nextentry->cfentry_lport == matchentry.cfentry_lport &&
5651                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5652                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5653                     IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5654 #if DATA_DEBUG
5655                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5656 #endif
5657                         return nextentry;
5658                 } else
5659 #endif /* INET6 */
5660                 if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5661                     nextentry->cfentry_fport == matchentry.cfentry_fport &&
5662                     nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5663                     nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5664 #if DATA_DEBUG
5665                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5666 #endif
5667                         return nextentry;
5668                 }
5669         }
5670
5671 done:
5672 #if DATA_DEBUG
5673         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5674 #endif
5675         return NULL;
5676 }
5677
5678 void
5679 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5680 {
5681         if (hash_entry == NULL) {
5682                 return;
5683         }
5684         if (db == NULL || db->cfdb_count == 0) {
5685                 return;
5686         }
5687         db->cfdb_count--;
5688         if (db->cfdb_only_entry == hash_entry) {
5689                 db->cfdb_only_entry = NULL;
5690         }
5691         LIST_REMOVE(hash_entry, cfentry_link);
5692         zfree(cfil_hash_entry_zone, hash_entry);
5693 }
5694
5695 struct cfil_hash_entry *
5696 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5697 {
5698         struct cfil_hash_entry *entry = NULL;
5699         struct inpcb *inp = sotoinpcb(db->cfdb_so);
5700         u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5701         int inp_hash_element = 0;
5702         struct cfilhashhead *cfilhash = NULL;
5703
5704         CFIL_LOG(LOG_INFO, "");
5705
5706         if (inp == NULL) {
5707                 goto done;
5708         }
5709
5710         entry = zalloc(cfil_hash_entry_zone);
5711         if (entry == NULL) {
5712                 goto done;
5713         }
5714         bzero(entry, sizeof(struct cfil_hash_entry));
5715
5716         if (local != NULL) {
5717                 fill_cfil_hash_entry_from_address(entry, TRUE, local);
5718         } else {
5719                 fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5720         }
5721         if (remote != NULL) {
5722                 fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5723         } else {
5724                 fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5725         }
5726         entry->cfentry_lastused = net_uptime();
5727
5728 #if INET6
5729         if (inp->inp_vflag & INP_IPV6) {
5730                 hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5731                 hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5732         } else
5733 #endif /* INET6 */
5734         {
5735                 hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5736                 hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5737         }
5738         entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5739             entry->cfentry_lport, entry->cfentry_fport);
5740         inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5741
5742         cfilhash = &db->cfdb_hashbase[inp_hash_element];
5743
5744         LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5745         db->cfdb_count++;
5746         db->cfdb_only_entry = entry;
5747         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5748
5749 done:
5750         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5751         return entry;
5752 }
5753
5754 struct cfil_info *
5755 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5756 {
5757         struct cfil_hash_entry *hash_entry = NULL;
5758
5759         CFIL_LOG(LOG_INFO, "");
5760
5761         if (db == NULL || id == 0) {
5762                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5763                     db ? (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so) : 0, id);
5764                 return NULL;
5765         }
5766
5767         // This is an optimization for connected UDP socket which only has one flow.
5768         // No need to do the hash lookup.
5769         if (db->cfdb_count == 1) {
5770                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5771                     db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5772                         return db->cfdb_only_entry->cfentry_cfil;
5773                 }
5774         }
5775
5776         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5777         return hash_entry != NULL ? hash_entry->cfentry_cfil : NULL;
5778 }
5779
5780 struct cfil_hash_entry *
5781 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5782 {
5783         struct cfil_hash_entry *hash_entry = NULL;
5784
5785         errno_t error = 0;
5786         socket_lock_assert_owned(so);
5787
5788         // If new socket, allocate cfil db
5789         if (so->so_cfil_db == NULL) {
5790                 if (cfil_db_init(so) != 0) {
5791                         return NULL;
5792                 }
5793         }
5794
5795         // See if flow already exists.
5796         hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5797         if (hash_entry != NULL) {
5798                 return hash_entry;
5799         }
5800
5801         hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5802         if (hash_entry == NULL) {
5803                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5804                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5805                 return NULL;
5806         }
5807
5808         if (cfil_info_alloc(so, hash_entry) == NULL ||
5809             hash_entry->cfentry_cfil == NULL) {
5810                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5811                 CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5812                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5813                 return NULL;
5814         }
5815         hash_entry->cfentry_cfil->cfi_dir = outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN;
5816
5817 #if LIFECYCLE_DEBUG
5818         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5819 #endif
5820
5821         if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5822                 cfil_info_free(hash_entry->cfentry_cfil);
5823                 cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5824                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5825                     filter_control_unit);
5826                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5827                 return NULL;
5828         }
5829         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5830             (uint64_t)VM_KERNEL_ADDRPERM(so),
5831             filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5832
5833         so->so_flags |= SOF_CONTENT_FILTER;
5834         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5835
5836         /* Hold a reference on the socket for each flow */
5837         so->so_usecount++;
5838
5839         error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, 0,
5840             outgoing ? CFS_CONNECTION_DIR_OUT : CFS_CONNECTION_DIR_IN);
5841         /* We can recover from flow control or out of memory errors */
5842         if (error != 0 && error != ENOBUFS && error != ENOMEM) {
5843                 return NULL;
5844         }
5845
5846         CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5847         return hash_entry;
5848 }
5849
5850 errno_t
5851 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
5852     struct sockaddr *local, struct sockaddr *remote,
5853     struct mbuf *data, struct mbuf *control, uint32_t flags)
5854 {
5855 #pragma unused(outgoing, so, local, remote, data, control, flags)
5856         errno_t error = 0;
5857         uint32_t filter_control_unit;
5858         struct cfil_hash_entry *hash_entry = NULL;
5859         struct cfil_info *cfil_info = NULL;
5860
5861         socket_lock_assert_owned(so);
5862
5863         if (cfil_active_count == 0) {
5864                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5865                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5866                 return error;
5867         }
5868
5869         // Socket has been blessed
5870         if ((so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0) {
5871                 return error;
5872         }
5873
5874         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5875         if (filter_control_unit == 0) {
5876                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5877                 return error;
5878         }
5879
5880         if (filter_control_unit == NECP_FILTER_UNIT_NO_FILTER) {
5881                 return error;
5882         }
5883
5884         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
5885                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5886                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5887                 return error;
5888         }
5889
5890         hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5891         if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
5892                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
5893                 return EPIPE;
5894         }
5895         // Update last used timestamp, this is for flow Idle TO
5896         hash_entry->cfentry_lastused = net_uptime();
5897         cfil_info = hash_entry->cfentry_cfil;
5898
5899         if (cfil_info->cfi_flags & CFIF_DROP) {
5900 #if DATA_DEBUG
5901                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
5902 #endif
5903                 return EPIPE;
5904         }
5905         if (control != NULL) {
5906                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5907         }
5908         if (data->m_type == MT_OOBDATA) {
5909                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5910                     (uint64_t)VM_KERNEL_ADDRPERM(so));
5911                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5912         }
5913
5914         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5915
5916         return error;
5917 }
5918
5919 /*
5920  * Go through all UDP flows for specified socket and returns TRUE if
5921  * any flow is still attached.  If need_wait is TRUE, wait on first
5922  * attached flow.
5923  */
5924 static int
5925 cfil_filters_udp_attached(struct socket *so, bool need_wait)
5926 {
5927         struct timespec ts;
5928         lck_mtx_t *mutex_held;
5929         struct cfilhashhead *cfilhash = NULL;
5930         struct cfil_db *db = NULL;
5931         struct cfil_hash_entry *hash_entry = NULL;
5932         struct cfil_hash_entry *temp_hash_entry = NULL;
5933         struct cfil_info *cfil_info = NULL;
5934         struct cfil_entry *entry = NULL;
5935         errno_t error = 0;
5936         int kcunit;
5937         int attached = 0;
5938         uint64_t sock_flow_id = 0;
5939
5940         socket_lock_assert_owned(so);
5941
5942         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5943                 if (so->so_proto->pr_getlock != NULL) {
5944                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5945                 } else {
5946                         mutex_held = so->so_proto->pr_domain->dom_mtx;
5947                 }
5948                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5949
5950                 db = so->so_cfil_db;
5951
5952                 for (int i = 0; i < CFILHASHSIZE; i++) {
5953                         cfilhash = &db->cfdb_hashbase[i];
5954
5955                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5956                                 if (hash_entry->cfentry_cfil != NULL) {
5957                                         cfil_info = hash_entry->cfentry_cfil;
5958                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5959                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
5960
5961                                                 /* Are we attached to the filter? */
5962                                                 if (entry->cfe_filter == NULL) {
5963                                                         continue;
5964                                                 }
5965
5966                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
5967                                                         continue;
5968                                                 }
5969                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0) {
5970                                                         continue;
5971                                                 }
5972
5973                                                 attached = 1;
5974
5975                                                 if (need_wait == TRUE) {
5976 #if LIFECYCLE_DEBUG
5977                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5978 #endif
5979
5980                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
5981                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5982                                                             NSEC_PER_USEC * 1000;
5983
5984                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5985                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
5986                                                         sock_flow_id = cfil_info->cfi_sock_id;
5987
5988                                                         error = msleep((caddr_t)cfil_info, mutex_held,
5989                                                             PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
5990
5991                                                         // Woke up from sleep, validate if cfil_info is still valid
5992                                                         if (so->so_cfil_db == NULL ||
5993                                                             (cfil_info != cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id))) {
5994                                                                 // cfil_info is not valid, do not continue
5995                                                                 goto done;
5996                                                         }
5997
5998                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5999
6000 #if LIFECYCLE_DEBUG
6001                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
6002 #endif
6003
6004                                                         /*
6005                                                          * Force close in case of timeout
6006                                                          */
6007                                                         if (error != 0) {
6008                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
6009 #if LIFECYCLE_DEBUG
6010                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
6011 #endif
6012                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
6013                                                         }
6014                                                 }
6015                                                 goto done;
6016                                         }
6017                                 }
6018                         }
6019                 }
6020         }
6021
6022 done:
6023         return attached;
6024 }
6025
6026 int32_t
6027 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
6028 {
6029         struct socket *so = sb->sb_so;
6030         struct cfi_buf *cfi_buf;
6031         uint64_t pending = 0;
6032         uint64_t total_pending = 0;
6033         struct cfilhashhead *cfilhash = NULL;
6034         struct cfil_db *db = NULL;
6035         struct cfil_hash_entry *hash_entry = NULL;
6036         struct cfil_hash_entry *temp_hash_entry = NULL;
6037
6038         socket_lock_assert_owned(so);
6039
6040         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
6041             (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
6042                 db = so->so_cfil_db;
6043
6044                 for (int i = 0; i < CFILHASHSIZE; i++) {
6045                         cfilhash = &db->cfdb_hashbase[i];
6046
6047                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6048                                 if (hash_entry->cfentry_cfil != NULL) {
6049                                         if ((sb->sb_flags & SB_RECV) == 0) {
6050                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
6051                                         } else {
6052                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
6053                                         }
6054
6055                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
6056                                         /*
6057                                          * If we are limited by the "chars of mbufs used" roughly
6058                                          * adjust so we won't overcommit
6059                                          */
6060                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending) {
6061                                                 pending = cfi_buf->cfi_pending_mbcnt;
6062                                         }
6063
6064                                         total_pending += pending;
6065                                 }
6066                         }
6067                 }
6068
6069                 VERIFY(total_pending < INT32_MAX);
6070 #if DATA_DEBUG
6071                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
6072                     (uint64_t)VM_KERNEL_ADDRPERM(so),
6073                     total_pending, check_thread);
6074 #endif
6075         }
6076
6077         return (int32_t)(total_pending);
6078 }
6079
6080 int
6081 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
6082 {
6083         struct cfil_info *cfil_info = NULL;
6084         struct cfilhashhead *cfilhash = NULL;
6085         struct cfil_db *db = NULL;
6086         struct cfil_hash_entry *hash_entry = NULL;
6087         struct cfil_hash_entry *temp_hash_entry = NULL;
6088         errno_t error = 0;
6089         int done_count = 0;
6090         int kcunit;
6091
6092         socket_lock_assert_owned(so);
6093
6094         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6095                 db = so->so_cfil_db;
6096
6097                 for (int i = 0; i < CFILHASHSIZE; i++) {
6098                         cfilhash = &db->cfdb_hashbase[i];
6099
6100                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6101                                 if (hash_entry->cfentry_cfil != NULL) {
6102                                         cfil_info = hash_entry->cfentry_cfil;
6103
6104                                         // This flow is marked as DROP
6105                                         if (cfil_info->cfi_flags & drop_flag) {
6106                                                 done_count++;
6107                                                 continue;
6108                                         }
6109
6110                                         // This flow has been shut already, skip
6111                                         if (cfil_info->cfi_flags & shut_flag) {
6112                                                 continue;
6113                                         }
6114                                         // Mark flow as shut
6115                                         cfil_info->cfi_flags |= shut_flag;
6116                                         done_count++;
6117
6118                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6119                                                 /* Disconnect incoming side */
6120                                                 if (how != SHUT_WR) {
6121                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
6122                                                 }
6123                                                 /* Disconnect outgoing side */
6124                                                 if (how != SHUT_RD) {
6125                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
6126                                                 }
6127                                         }
6128                                 }
6129                         }
6130                 }
6131         }
6132
6133         if (done_count == 0) {
6134                 error = ENOTCONN;
6135         }
6136         return error;
6137 }
6138
6139 int
6140 cfil_sock_udp_shutdown(struct socket *so, int *how)
6141 {
6142         int error = 0;
6143
6144         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL)) {
6145                 goto done;
6146         }
6147
6148         socket_lock_assert_owned(so);
6149
6150         CFIL_LOG(LOG_INFO, "so %llx how %d",
6151             (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
6152
6153         /*
6154          * Check the state of the socket before the content filter
6155          */
6156         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
6157                 /* read already shut down */
6158                 error = ENOTCONN;
6159                 goto done;
6160         }
6161         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
6162                 /* write already shut down */
6163                 error = ENOTCONN;
6164                 goto done;
6165         }
6166
6167         /*
6168          * shutdown read: SHUT_RD or SHUT_RDWR
6169          */
6170         if (*how != SHUT_WR) {
6171                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
6172                 if (error != 0) {
6173                         goto done;
6174                 }
6175         }
6176         /*
6177          * shutdown write: SHUT_WR or SHUT_RDWR
6178          */
6179         if (*how != SHUT_RD) {
6180                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
6181                 if (error != 0) {
6182                         goto done;
6183                 }
6184
6185                 /*
6186                  * When outgoing data is pending, we delay the shutdown at the
6187                  * protocol level until the content filters give the final
6188                  * verdict on the pending data.
6189                  */
6190                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
6191                         /*
6192                          * When shutting down the read and write sides at once
6193                          * we can proceed to the final shutdown of the read
6194                          * side. Otherwise, we just return.
6195                          */
6196                         if (*how == SHUT_WR) {
6197                                 error = EJUSTRETURN;
6198                         } else if (*how == SHUT_RDWR) {
6199                                 *how = SHUT_RD;
6200                         }
6201                 }
6202         }
6203 done:
6204         return error;
6205 }
6206
6207 void
6208 cfil_sock_udp_close_wait(struct socket *so)
6209 {
6210         socket_lock_assert_owned(so);
6211
6212         while (cfil_filters_udp_attached(so, FALSE)) {
6213                 /*
6214                  * Notify the filters we are going away so they can detach
6215                  */
6216                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
6217
6218                 /*
6219                  * Make sure we need to wait after the filter are notified
6220                  * of the disconnection
6221                  */
6222                 if (cfil_filters_udp_attached(so, TRUE) == 0) {
6223                         break;
6224                 }
6225         }
6226 }
6227
6228 void
6229 cfil_sock_udp_is_closed(struct socket *so)
6230 {
6231         struct cfil_info *cfil_info = NULL;
6232         struct cfilhashhead *cfilhash = NULL;
6233         struct cfil_db *db = NULL;
6234         struct cfil_hash_entry *hash_entry = NULL;
6235         struct cfil_hash_entry *temp_hash_entry = NULL;
6236         errno_t error = 0;
6237         int kcunit;
6238
6239         socket_lock_assert_owned(so);
6240
6241         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6242                 db = so->so_cfil_db;
6243
6244                 for (int i = 0; i < CFILHASHSIZE; i++) {
6245                         cfilhash = &db->cfdb_hashbase[i];
6246
6247                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6248                                 if (hash_entry->cfentry_cfil != NULL) {
6249                                         cfil_info = hash_entry->cfentry_cfil;
6250
6251                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6252                                                 /* Let the filters know of the closing */
6253                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
6254                                         }
6255
6256                                         /* Last chance to push passed data out */
6257                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
6258                                         if (error == 0) {
6259                                                 cfil_service_inject_queue(so, cfil_info, 1);
6260                                         }
6261                                         cfil_release_sockbuf(so, 1);
6262
6263                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
6264
6265                                         /* Pending data needs to go */
6266                                         cfil_flush_queues(so, cfil_info);
6267
6268                                         CFIL_INFO_VERIFY(cfil_info);
6269                                 }
6270                         }
6271                 }
6272         }
6273 }
6274
6275 void
6276 cfil_sock_udp_buf_update(struct sockbuf *sb)
6277 {
6278         struct cfil_info *cfil_info = NULL;
6279         struct cfilhashhead *cfilhash = NULL;
6280         struct cfil_db *db = NULL;
6281         struct cfil_hash_entry *hash_entry = NULL;
6282         struct cfil_hash_entry *temp_hash_entry = NULL;
6283         errno_t error = 0;
6284         int outgoing;
6285         struct socket *so = sb->sb_so;
6286
6287         socket_lock_assert_owned(so);
6288
6289         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
6290                 if (!cfil_sbtrim) {
6291                         return;
6292                 }
6293
6294                 db = so->so_cfil_db;
6295
6296                 for (int i = 0; i < CFILHASHSIZE; i++) {
6297                         cfilhash = &db->cfdb_hashbase[i];
6298
6299                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
6300                                 if (hash_entry->cfentry_cfil != NULL) {
6301                                         cfil_info = hash_entry->cfentry_cfil;
6302
6303                                         if ((sb->sb_flags & SB_RECV) == 0) {
6304                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0) {
6305                                                         return;
6306                                                 }
6307                                                 outgoing = 1;
6308                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
6309                                         } else {
6310                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0) {
6311                                                         return;
6312                                                 }
6313                                                 outgoing = 0;
6314                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
6315                                         }
6316
6317                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
6318                                             (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
6319
6320                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
6321                                         if (error == 0) {
6322                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
6323                                         }
6324                                         cfil_release_sockbuf(so, outgoing);
6325                                 }
6326                         }
6327                 }
6328         }
6329 }
6330
6331 void
6332 cfil_filter_show(u_int32_t kcunit)
6333 {
6334         struct content_filter *cfc = NULL;
6335         struct cfil_entry *entry;
6336         int count = 0;
6337
6338         if (content_filters == NULL) {
6339                 return;
6340         }
6341         if (kcunit > MAX_CONTENT_FILTER) {
6342                 return;
6343         }
6344
6345         cfil_rw_lock_shared(&cfil_lck_rw);
6346
6347         if (content_filters[kcunit - 1] == NULL) {
6348                 cfil_rw_unlock_shared(&cfil_lck_rw);
6349                 return;
6350         }
6351         cfc = content_filters[kcunit - 1];
6352
6353         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
6354             kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
6355         if (cfc->cf_flags & CFF_DETACHING) {
6356                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
6357         }
6358         if (cfc->cf_flags & CFF_ACTIVE) {
6359                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
6360         }
6361         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
6362                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
6363         }
6364
6365         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
6366                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
6367                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
6368
6369                         count++;
6370
6371                         if (entry->cfe_flags & CFEF_CFIL_DETACHED) {
6372                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
6373                         } else {
6374                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
6375                         }
6376                 }
6377         }
6378
6379         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
6380
6381         cfil_rw_unlock_shared(&cfil_lck_rw);
6382 }
6383
6384 void
6385 cfil_info_show(void)
6386 {
6387         struct cfil_info *cfil_info;
6388         int count = 0;
6389
6390         cfil_rw_lock_shared(&cfil_lck_rw);
6391
6392         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
6393
6394         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6395                 count++;
6396
6397                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
6398
6399                 if (cfil_info->cfi_flags & CFIF_DROP) {
6400                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
6401                 }
6402                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT) {
6403                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
6404                 }
6405                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED) {
6406                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
6407                 }
6408                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) {
6409                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
6410                 }
6411                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) {
6412                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
6413                 }
6414                 if (cfil_info->cfi_flags & CFIF_SHUT_WR) {
6415                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
6416                 }
6417                 if (cfil_info->cfi_flags & CFIF_SHUT_RD) {
6418                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
6419                 }
6420         }
6421
6422         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
6423
6424         cfil_rw_unlock_shared(&cfil_lck_rw);
6425 }
6426
6427 bool
6428 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
6429 {
6430         if (cfil_info && cfil_info->cfi_hash_entry &&
6431             (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
6432 #if GC_DEBUG
6433                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
6434 #endif
6435                 return true;
6436         }
6437         return false;
6438 }
6439
6440 bool
6441 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
6442 {
6443         struct cfil_entry *entry;
6444         struct timeval current_tv;
6445         struct timeval diff_time;
6446
6447         if (cfil_info == NULL) {
6448                 return false;
6449         }
6450
6451         /*
6452          * If we have queued up more data than passed offset and we haven't received
6453          * an action from user space for a while (the user space filter might have crashed),
6454          * return action timed out.
6455          */
6456         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
6457             cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
6458                 microuptime(&current_tv);
6459
6460                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
6461                         entry = &cfil_info->cfi_entries[kcunit - 1];
6462
6463                         if (entry->cfe_filter == NULL) {
6464                                 continue;
6465                         }
6466
6467                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
6468                             cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
6469                                 // haven't gotten an action from this filter, check timeout
6470                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
6471                                 if (diff_time.tv_sec >= timeout) {
6472 #if GC_DEBUG
6473                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
6474 #endif
6475                                         return true;
6476                                 }
6477                         }
6478                 }
6479         }
6480         return false;
6481 }
6482
6483 bool
6484 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
6485 {
6486         if (cfil_info == NULL) {
6487                 return false;
6488         }
6489
6490         /*
6491          * Clean up flow if it exceeded queue thresholds
6492          */
6493         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
6494             cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
6495 #if GC_DEBUG
6496                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
6497                     cfil_udp_gc_mbuf_num_max,
6498                     cfil_udp_gc_mbuf_cnt_max,
6499                     cfil_info->cfi_snd.cfi_tail_drop_cnt,
6500                     cfil_info->cfi_rcv.cfi_tail_drop_cnt);
6501                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
6502 #endif
6503                 return true;
6504         }
6505
6506         return false;
6507 }
6508
6509 static void
6510 cfil_udp_gc_thread_sleep(bool forever)
6511 {
6512         if (forever) {
6513                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
6514                     THREAD_INTERRUPTIBLE);
6515         } else {
6516                 uint64_t deadline = 0;
6517                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
6518                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
6519
6520                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
6521                     THREAD_INTERRUPTIBLE, deadline);
6522         }
6523 }
6524
6525 static void
6526 cfil_udp_gc_thread_func(void *v, wait_result_t w)
6527 {
6528 #pragma unused(v, w)
6529
6530         ASSERT(cfil_udp_gc_thread == current_thread());
6531         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
6532
6533         // Kick off gc shortly
6534         cfil_udp_gc_thread_sleep(false);
6535         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
6536         /* NOTREACHED */
6537 }
6538
6539 static void
6540 cfil_info_udp_expire(void *v, wait_result_t w)
6541 {
6542 #pragma unused(v, w)
6543
6544         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
6545         static uint32_t expired_count = 0;
6546
6547         struct cfil_info *cfil_info;
6548         struct cfil_hash_entry *hash_entry;
6549         struct cfil_db *db;
6550         struct socket *so;
6551         u_int32_t current_time = 0;
6552
6553         current_time = net_uptime();
6554
6555         // Get all expired UDP flow ids
6556         cfil_rw_lock_shared(&cfil_lck_rw);
6557
6558         if (cfil_sock_udp_attached_count == 0) {
6559                 cfil_rw_unlock_shared(&cfil_lck_rw);
6560                 goto go_sleep;
6561         }
6562
6563         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
6564                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT) {
6565                         break;
6566                 }
6567
6568                 if (IS_UDP(cfil_info->cfi_so)) {
6569                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
6570                             cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
6571                             cfil_info_buffer_threshold_exceeded(cfil_info)) {
6572                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
6573                                 expired_count++;
6574                         }
6575                 }
6576         }
6577         cfil_rw_unlock_shared(&cfil_lck_rw);
6578
6579         if (expired_count == 0) {
6580                 goto go_sleep;
6581         }
6582
6583         for (uint32_t i = 0; i < expired_count; i++) {
6584                 // Search for socket (UDP only and lock so)
6585                 so = cfil_socket_from_sock_id(expired_array[i], true);
6586                 if (so == NULL) {
6587                         continue;
6588                 }
6589
6590                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
6591                 if (cfil_info == NULL) {
6592                         goto unlock;
6593                 }
6594
6595                 db = so->so_cfil_db;
6596                 hash_entry = cfil_info->cfi_hash_entry;
6597
6598                 if (db == NULL || hash_entry == NULL) {
6599                         goto unlock;
6600                 }
6601
6602 #if GC_DEBUG || LIFECYCLE_DEBUG
6603                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
6604 #endif
6605
6606                 cfil_db_delete_entry(db, hash_entry);
6607                 cfil_info_free(cfil_info);
6608                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
6609
6610                 if (so->so_flags & SOF_CONTENT_FILTER) {
6611                         if (db->cfdb_count == 0) {
6612                                 so->so_flags &= ~SOF_CONTENT_FILTER;
6613                         }
6614                         VERIFY(so->so_usecount > 0);
6615                         so->so_usecount--;
6616                 }
6617 unlock:
6618                 socket_unlock(so, 1);
6619         }
6620
6621 #if GC_DEBUG
6622         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
6623 #endif
6624         expired_count = 0;
6625
6626 go_sleep:
6627
6628         // Sleep forever (until waken up) if no more UDP flow to clean
6629         cfil_rw_lock_shared(&cfil_lck_rw);
6630         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
6631         cfil_rw_unlock_shared(&cfil_lck_rw);
6632         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
6633         /* NOTREACHED */
6634 }
6635
6636 struct m_tag *
6637 cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
6638 {
6639         struct m_tag *tag = NULL;
6640         struct cfil_tag *ctag = NULL;
6641         struct cfil_hash_entry *hash_entry = NULL;
6642
6643         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6644             cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6645                 return NULL;
6646         }
6647
6648         /* Allocate a tag */
6649         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6650             sizeof(struct cfil_tag), M_DONTWAIT, m);
6651
6652         if (tag) {
6653                 ctag = (struct cfil_tag*)(tag + 1);
6654                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6655                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6656
6657                 hash_entry = cfil_info->cfi_hash_entry;
6658                 if (hash_entry->cfentry_family == AF_INET6) {
6659                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6660                             &hash_entry->cfentry_faddr.addr6,
6661                             hash_entry->cfentry_fport);
6662                 } else if (hash_entry->cfentry_family == AF_INET) {
6663                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6664                             hash_entry->cfentry_faddr.addr46.ia46_addr4,
6665                             hash_entry->cfentry_fport);
6666                 }
6667                 m_tag_prepend(m, tag);
6668                 return tag;
6669         }
6670         return NULL;
6671 }
6672
6673 struct m_tag *
6674 cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6675     struct sockaddr **faddr)
6676 {
6677         struct m_tag *tag = NULL;
6678         struct cfil_tag *ctag = NULL;
6679
6680         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6681         if (tag) {
6682                 ctag = (struct cfil_tag *)(tag + 1);
6683                 if (state_change_cnt) {
6684                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
6685                 }
6686                 if (options) {
6687                         *options = ctag->cfil_so_options;
6688                 }
6689                 if (faddr) {
6690                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6691                 }
6692
6693                 /*
6694                  * Unlink tag and hand it over to caller.
6695                  * Note that caller will be responsible to free it.
6696                  */
6697                 m_tag_unlink(m, tag);
6698                 return tag;
6699         }
6700         return NULL;
6701 }