bsd/net/content_filter.c

   1 /*
   2  * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23
  24 /*
  25  * THEORY OF OPERATION
  26  *
  27  * The socket content filter subsystem provides a way for user space agents to
  28  * make filtering decisions based on the content of the data being sent and
  29  * received by TCP/IP sockets.
  30  *
  31  * A content filter user space agents gets a copy of the data and the data is
  32  * also kept in kernel buffer until the user space agents makes a pass or drop
  33  * decision. This unidirectional flow of content avoids unnecessary data copies
  34  * back to the kernel.
  35  *
  36  * A user space filter agent opens a kernel control socket with the name
  37  * CONTENT_FILTER_CONTROL_NAME to attach to the socket content filter subsystem.
  38  * When connected, a "struct content_filter" is created and set as the
  39  * "unitinfo" of the corresponding kernel control socket instance.
  40  *
  41  * The socket content filter subsystem exchanges messages with the user space
  42  * filter agent until an ultimate pass or drop decision is made by the
  43  * user space filter agent.
  44  *
  45  * It should be noted that messages about many TCP/IP sockets can be multiplexed
  46  * over a single kernel control socket.
  47  *
  48  * Notes:
  49  * - The current implementation is limited to TCP sockets.
  50  * - The current implementation supports up to two simultaneous content filters
  51  *   for the sake of simplicity of the implementation.
  52  *
  53  *
  54  * NECP FILTER CONTROL UNIT
  55  *
  56  * A user space filter agent uses the Network Extension Control Policy (NECP)
  57  * database to specify which TCP/IP sockets need to be filtered. The NECP
  58  * criteria may be based on a variety of properties like user ID or proc UUID.
  59  *
  60  * The NECP "filter control unit" is used by the socket content filter subsystem
  61  * to deliver the relevant TCP/IP content information to the appropriate
  62  * user space filter agent via its kernel control socket instance.
  63  * This works as follows:
  64  *
  65  * 1) The user space filter agent specifies an NECP filter control unit when
  66  *    in adds its filtering rules to the NECP database.
  67  *
  68  * 2) The user space filter agent also sets its NECP filter control unit on the
  69  *    content filter kernel control socket via the socket option
  70  *    CFIL_OPT_NECP_CONTROL_UNIT.
  71  *
  72  * 3) The NECP database is consulted to find out if a given TCP/IP socket
  73  *    needs to be subjected to content filtering and returns the corresponding
  74  *    NECP filter control unit  -- the NECP filter control unit is actually
  75  *    stored in the TCP/IP socket structure so the NECP lookup is really simple.
  76  *
  77  * 4) The NECP filter control unit is then used to find the corresponding
  78  *    kernel control socket instance.
  79  *
  80  * Note: NECP currently supports a single filter control unit per TCP/IP socket
  81  *       but this restriction may be soon lifted.
  82  *
  83  *
  84  * THE MESSAGING PROTOCOL
  85  *
  86  * The socket content filter subsystem and a user space filter agent
  87  * communicate over the kernel control socket via an asynchronous
  88  * messaging protocol (this is not a request-response protocol).
  89  * The socket content filter subsystem sends event messages to the user
  90  * space filter agent about the TCP/IP sockets it is interested to filter.
  91  * The user space filter agent sends action messages to either allow
  92  * data to pass or to disallow the data flow (and drop the connection).
  93  *
  94  * All messages over a content filter kernel control socket share the same
  95  * common header of type "struct cfil_msg_hdr". The message type tells if
  96  * it's a event message "CFM_TYPE_EVENT" or a action message "CFM_TYPE_ACTION".
  97  * The message header field "cfm_sock_id" identifies a given TCP/IP socket.
  98  * Note the message header length field may be padded for alignment and can
  99  * be larger than the actual content of the message.
 100  * The field "cfm_op" describe the kind of event or action.
 101  *
 102  * Here are the kinds of content filter events:
 103  * - CFM_OP_SOCKET_ATTACHED: a new TCP/IP socket is being filtered
 104  * - CFM_OP_SOCKET_CLOSED: A TCP/IP socket is closed
 105  * - CFM_OP_DATA_OUT: A span of data is being sent on a TCP/IP socket
 106  * - CFM_OP_DATA_IN: A span of data is being or received on a TCP/IP socket
 107  *
 108  *
 109  * EVENT MESSAGES
 110  *
 111  * The CFM_OP_DATA_OUT and CFM_OP_DATA_IN event messages contains a span of
 112  * data that is being sent or received. The position of this span of data
 113  * in the data flow is described by a set of start and end offsets. These
 114  * are absolute 64 bits offsets. The first byte sent (or received) starts
 115  * at offset 0 and ends at offset 1. The length of the content data
 116  * is given by the difference between the end offset and the start offset.
 117  *
 118  * After a CFM_OP_SOCKET_ATTACHED is delivered, CFM_OP_DATA_OUT and
 119  * CFM_OP_DATA_OUT events are not delivered until a CFM_OP_DATA_UPDATE
 120  * action message is sent by the user space filter agent.
 121  *
 122  * Note: absolute 64 bits offsets should be large enough for the foreseeable
 123  * future.  A 64-bits counter will wrap after 468 years at 10 Gbit/sec:
 124  *   2E64 / ((10E9 / 8) * 60 * 60 * 24 * 365.25) = 467.63
 125  *
 126  * They are two kinds of primary content filter actions:
 127  * - CFM_OP_DATA_UPDATE: to update pass or peek offsets for each direction.
 128  * - CFM_OP_DROP: to shutdown socket and disallow further data flow
 129  *
 130  * There is also an action to mark a given client flow as already filtered
 131  * at a higher level, CFM_OP_BLESS_CLIENT.
 132  *
 133  *
 134  * ACTION MESSAGES
 135  *
 136  * The CFM_OP_DATA_UPDATE action messages let the user space filter
 137  * agent allow data to flow up to the specified pass offset -- there
 138  * is a pass offset for outgoing data and  a pass offset for incoming data.
 139  * When a new TCP/IP socket is attached to the content filter, each pass offset
 140  * is initially set to 0 so not data is allowed to pass by default.
 141  * When the pass offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 142  * then the data flow becomes unrestricted.
 143  *
 144  * Note that pass offsets can only be incremented. A CFM_OP_DATA_UPDATE message
 145  * with a pass offset smaller than the pass offset of a previous
 146  * CFM_OP_DATA_UPDATE message is silently ignored.
 147  *
 148  * A user space filter agent also uses CFM_OP_DATA_UPDATE action messages
 149  * to tell the kernel how much data it wants to see by using the peek offsets.
 150  * Just like pass offsets, there is a peek offset for each direction.
 151  * When a new TCP/IP socket is attached to the content filter, each peek offset
 152  * is initially set to 0 so no CFM_OP_DATA_OUT and CFM_OP_DATA_IN event
 153  * messages are dispatched by default until a CFM_OP_DATA_UPDATE action message
 154  * with a greater than 0 peek offset is sent by the user space filter agent.
 155  * When the peek offset is set to CFM_MAX_OFFSET via a CFM_OP_DATA_UPDATE
 156  * then the flow of update data events becomes unrestricted.
 157  *
 158  * Note that peek offsets cannot be smaller than the corresponding pass offset.
 159  * Also a peek offsets cannot be smaller than the corresponding end offset
 160  * of the last CFM_OP_DATA_OUT/CFM_OP_DATA_IN message dispatched. Trying
 161  * to set a too small peek value is silently ignored.
 162  *
 163  *
 164  * PER SOCKET "struct cfil_info"
 165  *
 166  * As soon as a TCP/IP socket gets attached to a content filter, a
 167  * "struct cfil_info" is created to hold the content filtering state for this
 168  * socket.
 169  *
 170  * The content filtering state is made of the following information
 171  * for each direction:
 172  * - The current pass offset;
 173  * - The first and last offsets of the data pending, waiting for a filtering
 174  *   decision;
 175  * - The inject queue for data that passed the filters and that needs
 176  *   to be re-injected;
 177  * - A content filter specific state in a set of  "struct cfil_entry"
 178  *
 179  *
 180  * CONTENT FILTER STATE "struct cfil_entry"
 181  *
 182  * The "struct cfil_entry" maintains the information most relevant to the
 183  * message handling over a kernel control socket with a user space filter agent.
 184  *
 185  * The "struct cfil_entry" holds the NECP filter control unit that corresponds
 186  * to the kernel control socket unit it corresponds to and also has a pointer
 187  * to the corresponding "struct content_filter".
 188  *
 189  * For each direction, "struct cfil_entry" maintains the following information:
 190  * - The pass offset
 191  * - The peek offset
 192  * - The offset of the last data peeked at by the filter
 193  * - A queue of data that's waiting to be delivered to the  user space filter
 194  *   agent on the kernel control socket
 195  * - A queue of data for which event messages have been sent on the kernel
 196  *   control socket and are pending for a filtering decision.
 197  *
 198  *
 199  * CONTENT FILTER QUEUES
 200  *
 201  * Data that is being filtered is steered away from the TCP/IP socket buffer
 202  * and instead will sit in one of three content filter queues until the data
 203  * can be re-injected into the TCP/IP socket buffer.
 204  *
 205  * A content filter queue is represented by "struct cfil_queue" that contains
 206  * a list of mbufs and the start and end offset of the data span of
 207  * the list of mbufs.
 208  *
 209  * The data moves into the three content filter queues according to this
 210  * sequence:
 211  * a) The "cfe_ctl_q" of "struct cfil_entry"
 212  * b) The "cfe_pending_q" of "struct cfil_entry"
 213  * c) The "cfi_inject_q" of "struct cfil_info"
 214  *
 215  * Note: The sequence (a),(b) may be repeated several times if there is more
 216  * than one content filter attached to the TCP/IP socket.
 217  *
 218  * The "cfe_ctl_q" queue holds data than cannot be delivered to the
 219  * kernel conntrol socket for two reasons:
 220  * - The peek offset is less that the end offset of the mbuf data
 221  * - The kernel control socket is flow controlled
 222  *
 223  * The "cfe_pending_q" queue holds data for which CFM_OP_DATA_OUT or
 224  * CFM_OP_DATA_IN have been successfully dispatched to the kernel control
 225  * socket and are waiting for a pass action message fromn the user space
 226  * filter agent. An mbuf length must be fully allowed to pass to be removed
 227  * from the cfe_pending_q.
 228  *
 229  * The "cfi_inject_q" queue holds data that has been fully allowed to pass
 230  * by the user space filter agent and that needs to be re-injected into the
 231  * TCP/IP socket.
 232  *
 233  *
 234  * IMPACT ON FLOW CONTROL
 235  *
 236  * An essential aspect of the content filer subsystem is to minimize the
 237  * impact on flow control of the TCP/IP sockets being filtered.
 238  *
 239  * The processing overhead of the content filtering may have an effect on
 240  * flow control by adding noticeable delays and cannot be eliminated --
 241  * care must be taken by the user space filter agent to minimize the
 242  * processing delays.
 243  *
 244  * The amount of data being filtered is kept in buffers while waiting for
 245  * a decision by the user space filter agent. This amount of data pending
 246  * needs to be subtracted from the amount of data available in the
 247  * corresponding TCP/IP socket buffer. This is done by modifying
 248  * sbspace() and tcp_sbspace() to account for amount of data pending
 249  * in the content filter.
 250  *
 251  *
 252  * LOCKING STRATEGY
 253  *
 254  * The global state of content filter subsystem is protected by a single
 255  * read-write lock "cfil_lck_rw". The data flow can be done with the
 256  * cfil read-write lock held as shared so it can be re-entered from multiple
 257  * threads.
 258  *
 259  * The per TCP/IP socket content filterstate -- "struct cfil_info" -- is
 260  * protected by the socket lock.
 261  *
 262  * A TCP/IP socket lock cannot be taken while the cfil read-write lock
 263  * is held. That's why we have some sequences where we drop the cfil read-write
 264  * lock before taking the TCP/IP lock.
 265  *
 266  * It is also important to lock the TCP/IP socket buffer while the content
 267  * filter is modifying the amount of pending data. Otherwise the calculations
 268  * in sbspace() and tcp_sbspace()  could be wrong.
 269  *
 270  * The "cfil_lck_rw" protects "struct content_filter" and also the fields
 271  * "cfe_link" and "cfe_filter" of "struct cfil_entry".
 272  *
 273  * Actually "cfe_link" and "cfe_filter" are protected by both by
 274  * "cfil_lck_rw" and the socket lock: they may be modified only when
 275  * "cfil_lck_rw" is exclusive and the socket is locked.
 276  *
 277  * To read the other fields of "struct content_filter" we have to take
 278  * "cfil_lck_rw" in shared mode.
 279  *
 280  *
 281  * LIMITATIONS
 282  *
 283  * - For TCP sockets only
 284  *
 285  * - Does not support TCP unordered messages
 286  */
 287
 288 /*
 289  *      TO DO LIST
 290  *
 291  *      SOONER:
 292  *
 293  *      Deal with OOB
 294  *
 295  *      LATER:
 296  *
 297  *      If support datagram, enqueue control and address mbufs as well
 298  */
 299
 300 #include <sys/types.h>
 301 #include <sys/kern_control.h>
 302 #include <sys/queue.h>
 303 #include <sys/domain.h>
 304 #include <sys/protosw.h>
 305 #include <sys/syslog.h>
 306 #include <sys/systm.h>
 307 #include <sys/param.h>
 308 #include <sys/mbuf.h>
 309
 310 #include <kern/locks.h>
 311 #include <kern/zalloc.h>
 312 #include <kern/debug.h>
 313
 314 #include <net/content_filter.h>
 315
 316 #include <netinet/in_pcb.h>
 317 #include <netinet/tcp.h>
 318 #include <netinet/tcp_var.h>
 319 #include <netinet/udp.h>
 320 #include <netinet/udp_var.h>
 321
 322 #include <string.h>
 323 #include <libkern/libkern.h>
 324 #include <kern/sched_prim.h>
 325
 326 #define MAX_CONTENT_FILTER 2
 327
 328 struct cfil_entry;
 329
 330 /*
 331  * The structure content_filter represents a user space content filter
 332  * It's created and associated with a kernel control socket instance
 333  */
 334 struct content_filter {
 335         kern_ctl_ref            cf_kcref;
 336         u_int32_t               cf_kcunit;
 337         u_int32_t               cf_flags;
 338
 339         uint32_t                cf_necp_control_unit;
 340
 341         uint32_t                cf_sock_count;
 342         TAILQ_HEAD(, cfil_entry) cf_sock_entries;
 343 };
 344
 345 #define CFF_ACTIVE              0x01
 346 #define CFF_DETACHING           0x02
 347 #define CFF_FLOW_CONTROLLED     0x04
 348
 349 struct content_filter **content_filters = NULL;
 350 uint32_t cfil_active_count = 0; /* Number of active content filters */
 351 uint32_t cfil_sock_attached_count = 0;  /* Number of sockets attachements */
 352 uint32_t cfil_sock_udp_attached_count = 0;      /* Number of UDP sockets attachements */
 353 uint32_t cfil_close_wait_timeout = 1000; /* in milliseconds */
 354
 355 static kern_ctl_ref cfil_kctlref = NULL;
 356
 357 static lck_grp_attr_t *cfil_lck_grp_attr = NULL;
 358 static lck_attr_t *cfil_lck_attr = NULL;
 359 static lck_grp_t *cfil_lck_grp = NULL;
 360 decl_lck_rw_data(static, cfil_lck_rw);
 361
 362 #define CFIL_RW_LCK_MAX 8
 363
 364 int cfil_rw_nxt_lck = 0;
 365 void* cfil_rw_lock_history[CFIL_RW_LCK_MAX];
 366
 367 int cfil_rw_nxt_unlck = 0;
 368 void* cfil_rw_unlock_history[CFIL_RW_LCK_MAX];
 369
 370 #define CONTENT_FILTER_ZONE_NAME        "content_filter"
 371 #define CONTENT_FILTER_ZONE_MAX         10
 372 static struct zone *content_filter_zone = NULL; /* zone for content_filter */
 373
 374
 375 #define CFIL_INFO_ZONE_NAME     "cfil_info"
 376 #define CFIL_INFO_ZONE_MAX      1024
 377 static struct zone *cfil_info_zone = NULL;      /* zone for cfil_info */
 378
 379 MBUFQ_HEAD(cfil_mqhead);
 380
 381 struct cfil_queue {
 382         uint64_t                q_start; /* offset of first byte in queue */
 383         uint64_t                q_end; /* offset of last byte in queue */
 384         struct cfil_mqhead      q_mq;
 385 };
 386
 387 /*
 388  * struct cfil_entry
 389  *
 390  * The is one entry per content filter
 391  */
 392 struct cfil_entry {
 393         TAILQ_ENTRY(cfil_entry) cfe_link;
 394         struct content_filter   *cfe_filter;
 395
 396         struct cfil_info        *cfe_cfil_info;
 397         uint32_t                cfe_flags;
 398         uint32_t                cfe_necp_control_unit;
 399         struct timeval          cfe_last_event; /* To user space */
 400         struct timeval          cfe_last_action; /* From user space */
 401
 402         struct cfe_buf {
 403                 /*
 404                  * cfe_pending_q holds data that has been delivered to
 405                  * the filter and for which we are waiting for an action
 406                  */
 407                 struct cfil_queue       cfe_pending_q;
 408                 /*
 409                  * This queue is for data that has not be delivered to
 410                  * the content filter (new data, pass peek or flow control)
 411                  */
 412                 struct cfil_queue       cfe_ctl_q;
 413
 414                 uint64_t                cfe_pass_offset;
 415                 uint64_t                cfe_peek_offset;
 416                 uint64_t                cfe_peeked;
 417         } cfe_snd, cfe_rcv;
 418 };
 419
 420 #define CFEF_CFIL_ATTACHED              0x0001  /* was attached to filter */
 421 #define CFEF_SENT_SOCK_ATTACHED         0x0002  /* sock attach event was sent */
 422 #define CFEF_DATA_START                 0x0004  /* can send data event */
 423 #define CFEF_FLOW_CONTROLLED            0x0008  /* wait for flow control lift */
 424 #define CFEF_SENT_DISCONNECT_IN         0x0010  /* event was sent */
 425 #define CFEF_SENT_DISCONNECT_OUT        0x0020  /* event was sent */
 426 #define CFEF_SENT_SOCK_CLOSED           0x0040  /* closed event was sent */
 427 #define CFEF_CFIL_DETACHED              0x0080  /* filter was detached */
 428
 429
 430 #define CFI_ADD_TIME_LOG(cfil, t1, t0, op)                                                                                      \
 431                 struct timeval _tdiff;                                                                                          \
 432                 if ((cfil)->cfi_op_list_ctr < CFI_MAX_TIME_LOG_ENTRY) {                                                         \
 433                         timersub(t1, t0, &_tdiff);                                                                              \
 434                         (cfil)->cfi_op_time[(cfil)->cfi_op_list_ctr] = (uint32_t)(_tdiff.tv_sec * 1000 + _tdiff.tv_usec / 1000);\
 435                         (cfil)->cfi_op_list[(cfil)->cfi_op_list_ctr] = (unsigned char)op;                                       \
 436                         (cfil)->cfi_op_list_ctr ++;                                                                             \
 437                 }
 438
 439 struct cfil_hash_entry;
 440
 441 /*
 442  * struct cfil_info
 443  *
 444  * There is a struct cfil_info per socket
 445  */
 446 struct cfil_info {
 447         TAILQ_ENTRY(cfil_info)  cfi_link;
 448         struct socket           *cfi_so;
 449         uint64_t                cfi_flags;
 450         uint64_t                cfi_sock_id;
 451         struct timeval64        cfi_first_event;
 452         uint32_t                cfi_op_list_ctr;
 453         uint32_t                cfi_op_time[CFI_MAX_TIME_LOG_ENTRY];    /* time interval in microseconds since first event */
 454         unsigned char           cfi_op_list[CFI_MAX_TIME_LOG_ENTRY];
 455
 456         struct cfi_buf {
 457                 /*
 458                  * cfi_pending_first and cfi_pending_last describe the total
 459                  * amount of data outstanding for all the filters on
 460                  * this socket and data in the flow queue
 461                  * cfi_pending_mbcnt counts in sballoc() "chars of mbufs used"
 462                  */
 463                 uint64_t                cfi_pending_first;
 464                 uint64_t                cfi_pending_last;
 465                 uint32_t                cfi_pending_mbcnt;
 466                 uint32_t                cfi_pending_mbnum;
 467                 uint32_t                cfi_tail_drop_cnt;
 468                 /*
 469                  * cfi_pass_offset is the minimum of all the filters
 470                  */
 471                 uint64_t                cfi_pass_offset;
 472                 /*
 473                  * cfi_inject_q holds data that needs to be re-injected
 474                  * into the socket after filtering and that can
 475                  * be queued because of flow control
 476                  */
 477                 struct cfil_queue       cfi_inject_q;
 478         } cfi_snd, cfi_rcv;
 479
 480         struct cfil_entry       cfi_entries[MAX_CONTENT_FILTER];
 481         struct cfil_hash_entry *cfi_hash_entry;
 482 } __attribute__((aligned(8)));
 483
 484 #define CFIF_DROP               0x0001  /* drop action applied */
 485 #define CFIF_CLOSE_WAIT         0x0002  /* waiting for filter to close */
 486 #define CFIF_SOCK_CLOSED        0x0004  /* socket is closed */
 487 #define CFIF_RETRY_INJECT_IN    0x0010  /* inject in failed */
 488 #define CFIF_RETRY_INJECT_OUT   0x0020  /* inject out failed */
 489 #define CFIF_SHUT_WR            0x0040  /* shutdown write */
 490 #define CFIF_SHUT_RD            0x0080  /* shutdown read */
 491
 492 #define CFI_MASK_GENCNT         0xFFFFFFFF00000000      /* upper 32 bits */
 493 #define CFI_SHIFT_GENCNT        32
 494 #define CFI_MASK_FLOWHASH       0x00000000FFFFFFFF      /* lower 32 bits */
 495 #define CFI_SHIFT_FLOWHASH      0
 496
 497 TAILQ_HEAD(cfil_sock_head, cfil_info) cfil_sock_head;
 498
 499 #define CFIL_QUEUE_VERIFY(x) if (cfil_debug) cfil_queue_verify(x)
 500 #define CFIL_INFO_VERIFY(x) if (cfil_debug) cfil_info_verify(x)
 501
 502 /*
 503  * UDP Socket Support
 504  */
 505 LIST_HEAD(cfilhashhead, cfil_hash_entry);
 506 #define CFILHASHSIZE 16
 507 #define CFIL_HASH(laddr, faddr, lport, fport) ((faddr) ^ ((laddr) >> 16) ^ (fport) ^ (lport))
 508 #define IS_UDP(so) (so && so->so_proto->pr_type == SOCK_DGRAM && so->so_proto->pr_protocol == IPPROTO_UDP)
 509 #define UNCONNECTED(inp) (inp && (((inp->inp_vflag & INP_IPV4) && (inp->inp_faddr.s_addr == INADDR_ANY)) || \
 510                                                                   ((inp->inp_vflag & INP_IPV6) && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))))
 511 #define IS_ENTRY_ATTACHED(cfil_info, kcunit) (cfil_info != NULL && (kcunit <= MAX_CONTENT_FILTER) && \
 512                                                                                           cfil_info->cfi_entries[kcunit - 1].cfe_filter != NULL)
 513 #define IS_DNS(local, remote) (check_port(local, 53) || check_port(remote, 53) || check_port(local, 5353) || check_port(remote, 5353))
 514
 515 /*
 516  * UDP Garbage Collection:
 517  */
 518 static struct thread *cfil_udp_gc_thread;
 519 #define UDP_FLOW_GC_IDLE_TO          30  // Flow Idle Timeout in seconds
 520 #define UDP_FLOW_GC_ACTION_TO        10  // Flow Action Timeout (no action from user space) in seconds
 521 #define UDP_FLOW_GC_MAX_COUNT        100 // Max UDP flows to be handled per run
 522 #define UDP_FLOW_GC_RUN_INTERVAL_NSEC  (10 * NSEC_PER_SEC)  // GC wakes up every 10 seconds
 523
 524 /*
 525  * UDP flow queue thresholds
 526  */
 527 #define UDP_FLOW_GC_MBUF_CNT_MAX  (2 << MBSHIFT) // Max mbuf byte count in flow queue (2MB)
 528 #define UDP_FLOW_GC_MBUF_NUM_MAX  (UDP_FLOW_GC_MBUF_CNT_MAX >> MCLSHIFT) // Max mbuf count in flow queue (1K)
 529 #define UDP_FLOW_GC_MBUF_SHIFT    5             // Shift to get 1/32 of platform limits
 530 /*
 531  * UDP flow queue threshold globals:
 532  */
 533 static unsigned int cfil_udp_gc_mbuf_num_max = UDP_FLOW_GC_MBUF_NUM_MAX;
 534 static unsigned int cfil_udp_gc_mbuf_cnt_max = UDP_FLOW_GC_MBUF_CNT_MAX;
 535
 536 /*
 537  * struct cfil_hash_entry
 538  *
 539  * Hash entry for cfil_info
 540  */
 541 struct cfil_hash_entry {
 542     LIST_ENTRY(cfil_hash_entry)    cfentry_link;
 543     struct cfil_info               *cfentry_cfil;
 544     u_short cfentry_fport;
 545     u_short cfentry_lport;
 546     sa_family_t                    cfentry_family;
 547     u_int32_t                      cfentry_flowhash;
 548     u_int32_t                      cfentry_lastused;
 549     union {
 550         /* foreign host table entry */
 551         struct in_addr_4in6 addr46;
 552         struct in6_addr addr6;
 553     } cfentry_faddr;
 554     union {
 555         /* local host table entry */
 556         struct in_addr_4in6 addr46;
 557         struct in6_addr addr6;
 558     } cfentry_laddr;
 559 };
 560
 561 /*
 562  * struct cfil_db
 563  *
 564  * For each UDP socket, this is a hash table maintaining all cfil_info structs
 565  * keyed by the flow 4-tuples <lport,fport,laddr,faddr>.
 566  */
 567 struct cfil_db {
 568     struct socket       *cfdb_so;
 569     uint32_t            cfdb_count;           /* Number of total content filters */
 570     struct cfilhashhead *cfdb_hashbase;
 571     u_long              cfdb_hashmask;
 572         struct cfil_hash_entry *cfdb_only_entry;  /* Optimization for connected UDP */
 573 };
 574
 575 /*
 576  * CFIL specific mbuf tag:
 577  * Save state of socket at the point of data entry into cfil.
 578  * Use saved state for reinjection at protocol layer.
 579  */
 580 struct cfil_tag {
 581         union sockaddr_in_4_6 cfil_faddr;
 582         uint32_t cfil_so_state_change_cnt;
 583         short cfil_so_options;
 584 };
 585
 586 #define    CFIL_HASH_ENTRY_ZONE_NAME    "cfil_entry_hash"
 587 #define    CFIL_HASH_ENTRY_ZONE_MAX     1024
 588 static struct zone *cfil_hash_entry_zone = NULL;
 589
 590 #define    CFIL_DB_ZONE_NAME       "cfil_db"
 591 #define    CFIL_DB_ZONE_MAX        1024
 592 static struct zone *cfil_db_zone = NULL;
 593
 594 /*
 595  * Statistics
 596  */
 597
 598 struct cfil_stats cfil_stats;
 599
 600 /*
 601  * For troubleshooting
 602  */
 603 int cfil_log_level = LOG_ERR;
 604 int cfil_debug = 1;
 605
 606 // Debug controls added for selective debugging.
 607 // Disabled for production.  If enabled,
 608 // these will have performance impact
 609 #define LIFECYCLE_DEBUG 0
 610 #define VERDICT_DEBUG 0
 611 #define DATA_DEBUG 0
 612 #define SHOW_DEBUG 0
 613 #define GC_DEBUG 0
 614
 615 /*
 616  * Sysctls for logs and statistics
 617  */
 618 static int sysctl_cfil_filter_list(struct sysctl_oid *, void *, int,
 619         struct sysctl_req *);
 620 static int sysctl_cfil_sock_list(struct sysctl_oid *, void *, int,
 621         struct sysctl_req *);
 622
 623 SYSCTL_NODE(_net, OID_AUTO, cfil, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "cfil");
 624
 625 SYSCTL_INT(_net_cfil, OID_AUTO, log, CTLFLAG_RW|CTLFLAG_LOCKED,
 626         &cfil_log_level, 0, "");
 627
 628 SYSCTL_INT(_net_cfil, OID_AUTO, debug, CTLFLAG_RW|CTLFLAG_LOCKED,
 629         &cfil_debug, 0, "");
 630
 631 SYSCTL_UINT(_net_cfil, OID_AUTO, sock_attached_count, CTLFLAG_RD|CTLFLAG_LOCKED,
 632         &cfil_sock_attached_count, 0, "");
 633
 634 SYSCTL_UINT(_net_cfil, OID_AUTO, active_count, CTLFLAG_RD|CTLFLAG_LOCKED,
 635         &cfil_active_count, 0, "");
 636
 637 SYSCTL_UINT(_net_cfil, OID_AUTO, close_wait_timeout, CTLFLAG_RW|CTLFLAG_LOCKED,
 638         &cfil_close_wait_timeout, 0, "");
 639
 640 static int cfil_sbtrim = 1;
 641 SYSCTL_UINT(_net_cfil, OID_AUTO, sbtrim, CTLFLAG_RW|CTLFLAG_LOCKED,
 642         &cfil_sbtrim, 0, "");
 643
 644 SYSCTL_PROC(_net_cfil, OID_AUTO, filter_list, CTLFLAG_RD|CTLFLAG_LOCKED,
 645         0, 0, sysctl_cfil_filter_list, "S,cfil_filter_stat",  "");
 646
 647 SYSCTL_PROC(_net_cfil, OID_AUTO, sock_list, CTLFLAG_RD|CTLFLAG_LOCKED,
 648         0, 0, sysctl_cfil_sock_list, "S,cfil_sock_stat",  "");
 649
 650 SYSCTL_STRUCT(_net_cfil, OID_AUTO, stats, CTLFLAG_RD|CTLFLAG_LOCKED,
 651         &cfil_stats, cfil_stats, "");
 652
 653 /*
 654  * Forward declaration to appease the compiler
 655  */
 656 static int cfil_action_data_pass(struct socket *, struct cfil_info *, uint32_t, int,
 657         uint64_t, uint64_t);
 658 static int cfil_action_drop(struct socket *, struct cfil_info *, uint32_t);
 659 static int cfil_action_bless_client(uint32_t, struct cfil_msg_hdr *);
 660 static int cfil_dispatch_closed_event(struct socket *, struct cfil_info *, int);
 661 static int cfil_data_common(struct socket *, struct cfil_info *, int, struct sockaddr *,
 662         struct mbuf *, struct mbuf *, uint32_t);
 663 static int cfil_data_filter(struct socket *, struct cfil_info *, uint32_t, int,
 664         struct mbuf *, uint64_t);
 665 static void fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *,
 666         struct in_addr, u_int16_t);
 667 static void fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *,
 668         struct in6_addr *, u_int16_t);
 669 ;
 670 static int cfil_dispatch_attach_event(struct socket *, struct cfil_info *, uint32_t);
 671 static void cfil_info_free(struct cfil_info *);
 672 static struct cfil_info * cfil_info_alloc(struct socket *, struct cfil_hash_entry *);
 673 static int cfil_info_attach_unit(struct socket *, uint32_t, struct cfil_info *);
 674 static struct socket * cfil_socket_from_sock_id(cfil_sock_id_t, bool);
 675 static struct socket * cfil_socket_from_client_uuid(uuid_t, bool *);
 676 static int cfil_service_pending_queue(struct socket *, struct cfil_info *, uint32_t, int);
 677 static int cfil_data_service_ctl_q(struct socket *, struct cfil_info *, uint32_t, int);
 678 static void cfil_info_verify(struct cfil_info *);
 679 static int cfil_update_data_offsets(struct socket *, struct cfil_info *, uint32_t, int,
 680         uint64_t, uint64_t);
 681 static int cfil_acquire_sockbuf(struct socket *, struct cfil_info *, int);
 682 static void cfil_release_sockbuf(struct socket *, int);
 683 static int cfil_filters_attached(struct socket *);
 684
 685 static void cfil_rw_lock_exclusive(lck_rw_t *);
 686 static void cfil_rw_unlock_exclusive(lck_rw_t *);
 687 static void cfil_rw_lock_shared(lck_rw_t *);
 688 static void cfil_rw_unlock_shared(lck_rw_t *);
 689 static boolean_t cfil_rw_lock_shared_to_exclusive(lck_rw_t *);
 690 static void cfil_rw_lock_exclusive_to_shared(lck_rw_t *);
 691
 692 static unsigned int cfil_data_length(struct mbuf *, int *, int *);
 693 static errno_t cfil_db_init(struct socket *);
 694 static void cfil_db_free(struct socket *so);
 695 struct cfil_hash_entry *cfil_db_lookup_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 696 struct cfil_hash_entry *cfil_db_lookup_entry_with_sockid(struct cfil_db *, u_int64_t);
 697 struct cfil_hash_entry *cfil_db_add_entry(struct cfil_db *, struct sockaddr *, struct sockaddr *);
 698 void cfil_db_delete_entry(struct cfil_db *, struct cfil_hash_entry *);
 699 struct cfil_hash_entry *cfil_sock_udp_get_flow(struct socket *, uint32_t, bool, struct sockaddr *, struct sockaddr *);
 700 struct cfil_info *cfil_db_get_cfil_info(struct cfil_db *, cfil_sock_id_t);
 701 static errno_t cfil_sock_udp_handle_data(bool, struct socket *, struct sockaddr *, struct sockaddr *,
 702                                                                                  struct mbuf *, struct mbuf *, uint32_t);
 703 static int32_t cfil_sock_udp_data_pending(struct sockbuf *, bool);
 704 static void cfil_sock_udp_is_closed(struct socket *);
 705 static int cfil_sock_udp_notify_shutdown(struct socket *, int , int, int);
 706 static int cfil_sock_udp_shutdown(struct socket *, int *);
 707 static void cfil_sock_udp_close_wait(struct socket *);
 708 static void cfil_sock_udp_buf_update(struct sockbuf *);
 709 static int cfil_filters_udp_attached(struct socket *, bool);
 710 static void cfil_get_flow_address_v6(struct cfil_hash_entry *, struct inpcb *,
 711                                                                          struct in6_addr **, struct in6_addr **,
 712                                                                          u_int16_t *, u_int16_t *);
 713 static void cfil_get_flow_address(struct cfil_hash_entry *, struct inpcb *,
 714                                                                   struct in_addr *, struct in_addr *,
 715                                                                   u_int16_t *, u_int16_t *);
 716 static void cfil_info_log(int, struct cfil_info *, const char *);
 717 void cfil_filter_show(u_int32_t);
 718 void cfil_info_show(void);
 719 bool cfil_info_idle_timed_out(struct cfil_info *, int, u_int32_t);
 720 bool cfil_info_action_timed_out(struct cfil_info *, int);
 721 bool cfil_info_buffer_threshold_exceeded(struct cfil_info *);
 722 struct m_tag *cfil_udp_save_socket_state(struct cfil_info *, struct mbuf *);
 723 static void cfil_udp_gc_thread_func(void *, wait_result_t);
 724 static void cfil_info_udp_expire(void *, wait_result_t);
 725
 726 bool check_port(struct sockaddr *, u_short);
 727
 728 /*
 729  * Content filter global read write lock
 730  */
 731
 732 static void
 733 cfil_rw_lock_exclusive(lck_rw_t *lck)
 734 {
 735         void *lr_saved;
 736
 737         lr_saved = __builtin_return_address(0);
 738
 739         lck_rw_lock_exclusive(lck);
 740
 741         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 742         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 743 }
 744
 745 static void
 746 cfil_rw_unlock_exclusive(lck_rw_t *lck)
 747 {
 748         void *lr_saved;
 749
 750         lr_saved = __builtin_return_address(0);
 751
 752         lck_rw_unlock_exclusive(lck);
 753
 754         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 755         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 756 }
 757
 758 static void
 759 cfil_rw_lock_shared(lck_rw_t *lck)
 760 {
 761         void *lr_saved;
 762
 763         lr_saved = __builtin_return_address(0);
 764
 765         lck_rw_lock_shared(lck);
 766
 767         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 768         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 769 }
 770
 771 static void
 772 cfil_rw_unlock_shared(lck_rw_t *lck)
 773 {
 774         void *lr_saved;
 775
 776         lr_saved = __builtin_return_address(0);
 777
 778         lck_rw_unlock_shared(lck);
 779
 780         cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 781         cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 782 }
 783
 784 static boolean_t
 785 cfil_rw_lock_shared_to_exclusive(lck_rw_t *lck)
 786 {
 787         void *lr_saved;
 788         boolean_t upgraded;
 789
 790         lr_saved = __builtin_return_address(0);
 791
 792         upgraded = lck_rw_lock_shared_to_exclusive(lck);
 793         if (upgraded) {
 794                 cfil_rw_unlock_history[cfil_rw_nxt_unlck] = lr_saved;
 795                 cfil_rw_nxt_unlck = (cfil_rw_nxt_unlck + 1) % CFIL_RW_LCK_MAX;
 796         }
 797         return (upgraded);
 798 }
 799
 800 static void
 801 cfil_rw_lock_exclusive_to_shared(lck_rw_t *lck)
 802 {
 803         void *lr_saved;
 804
 805         lr_saved = __builtin_return_address(0);
 806
 807         lck_rw_lock_exclusive_to_shared(lck);
 808
 809         cfil_rw_lock_history[cfil_rw_nxt_lck] = lr_saved;
 810         cfil_rw_nxt_lck = (cfil_rw_nxt_lck + 1) % CFIL_RW_LCK_MAX;
 811 }
 812
 813 static void
 814 cfil_rw_lock_assert_held(lck_rw_t *lck, int exclusive)
 815 {
 816 #if !MACH_ASSERT
 817 #pragma unused(lck, exclusive)
 818 #endif
 819         LCK_RW_ASSERT(lck,
 820             exclusive ? LCK_RW_ASSERT_EXCLUSIVE : LCK_RW_ASSERT_HELD);
 821 }
 822
 823 /*
 824  * Return the number of bytes in the mbuf chain using the same
 825  * method as m_length() or sballoc()
 826  *
 827  * Returns data len - starting from PKT start
 828  * - retmbcnt - optional param to get total mbuf bytes in chain
 829  * - retmbnum - optional param to get number of mbufs in chain
 830  */
 831 static unsigned int
 832 cfil_data_length(struct mbuf *m, int *retmbcnt, int *retmbnum)
 833 {
 834         struct mbuf *m0;
 835         unsigned int pktlen = 0;
 836         int mbcnt;
 837         int mbnum;
 838
 839         // Locate the start of data
 840         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 841                 if (m0->m_flags & M_PKTHDR)
 842                         break;
 843         }
 844         if (m0 == NULL) {
 845                 CFIL_LOG(LOG_ERR, "cfil_data_length: no M_PKTHDR");
 846                 return (0);
 847         }
 848         m = m0;
 849
 850         if (retmbcnt == NULL && retmbnum == NULL)
 851                 return (m_length(m));
 852
 853         pktlen = 0;
 854         mbcnt = 0;
 855         mbnum = 0;
 856         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 857                 pktlen += m0->m_len;
 858                 mbnum++;
 859                 mbcnt += MSIZE;
 860                 if (m0->m_flags & M_EXT)
 861                         mbcnt += m0->m_ext.ext_size;
 862         }
 863         if (retmbcnt) {
 864                 *retmbcnt = mbcnt;
 865         }
 866         if (retmbnum) {
 867                 *retmbnum = mbnum;
 868         }
 869         return (pktlen);
 870 }
 871
 872 static struct mbuf *
 873 cfil_data_start(struct mbuf *m)
 874 {
 875         struct mbuf *m0;
 876
 877         // Locate the start of data
 878         for (m0 = m; m0 != NULL; m0 = m0->m_next) {
 879                 if (m0->m_flags & M_PKTHDR)
 880                         break;
 881         }
 882         return m0;
 883 }
 884
 885 /*
 886  * Common mbuf queue utilities
 887  */
 888
 889 static inline void
 890 cfil_queue_init(struct cfil_queue *cfq)
 891 {
 892         cfq->q_start = 0;
 893         cfq->q_end = 0;
 894         MBUFQ_INIT(&cfq->q_mq);
 895 }
 896
 897 static inline uint64_t
 898 cfil_queue_drain(struct cfil_queue *cfq)
 899 {
 900         uint64_t drained = cfq->q_start - cfq->q_end;
 901         cfq->q_start = 0;
 902         cfq->q_end = 0;
 903         MBUFQ_DRAIN(&cfq->q_mq);
 904
 905         return (drained);
 906 }
 907
 908 /* Return 1 when empty, 0 otherwise */
 909 static inline int
 910 cfil_queue_empty(struct cfil_queue *cfq)
 911 {
 912         return (MBUFQ_EMPTY(&cfq->q_mq));
 913 }
 914
 915 static inline uint64_t
 916 cfil_queue_offset_first(struct cfil_queue *cfq)
 917 {
 918         return (cfq->q_start);
 919 }
 920
 921 static inline uint64_t
 922 cfil_queue_offset_last(struct cfil_queue *cfq)
 923 {
 924         return (cfq->q_end);
 925 }
 926
 927 static inline uint64_t
 928 cfil_queue_len(struct cfil_queue *cfq)
 929 {
 930         return (cfq->q_end - cfq->q_start);
 931 }
 932
 933 /*
 934  * Routines to verify some fundamental assumptions
 935  */
 936
 937 static void
 938 cfil_queue_verify(struct cfil_queue *cfq)
 939 {
 940         mbuf_t chain;
 941         mbuf_t m;
 942         mbuf_t n;
 943         uint64_t queuesize = 0;
 944
 945         /* Verify offset are ordered */
 946         VERIFY(cfq->q_start <= cfq->q_end);
 947
 948         /*
 949          * When queue is empty, the offsets are equal otherwise the offsets
 950          * are different
 951          */
 952         VERIFY((MBUFQ_EMPTY(&cfq->q_mq) && cfq->q_start == cfq->q_end) ||
 953                 (!MBUFQ_EMPTY(&cfq->q_mq) &&
 954                 cfq->q_start != cfq->q_end));
 955
 956         MBUFQ_FOREACH(chain, &cfq->q_mq) {
 957                 size_t chainsize = 0;
 958                 m = chain;
 959                 unsigned int mlen = cfil_data_length(m, NULL, NULL);
 960                 // skip the addr and control stuff if present
 961                 m = cfil_data_start(m);
 962
 963                 if (m == NULL ||
 964                         m == (void *)M_TAG_FREE_PATTERN ||
 965                         m->m_next == (void *)M_TAG_FREE_PATTERN ||
 966                         m->m_nextpkt == (void *)M_TAG_FREE_PATTERN)
 967                         panic("%s - mq %p is free at %p", __func__,
 968                                 &cfq->q_mq, m);
 969                 for (n = m; n != NULL; n = n->m_next) {
 970                         if (n->m_type != MT_DATA &&
 971                                 n->m_type != MT_HEADER &&
 972                                 n->m_type != MT_OOBDATA)
 973                         panic("%s - %p unsupported type %u", __func__,
 974                                 n, n->m_type);
 975                         chainsize += n->m_len;
 976                 }
 977                 if (mlen != chainsize)
 978                         panic("%s - %p m_length() %u != chainsize %lu",
 979                                 __func__, m, mlen, chainsize);
 980                 queuesize += chainsize;
 981         }
 982         if (queuesize != cfq->q_end - cfq->q_start)
 983                 panic("%s - %p queuesize %llu != offsetdiffs %llu", __func__,
 984                         m, queuesize, cfq->q_end - cfq->q_start);
 985 }
 986
 987 static void
 988 cfil_queue_enqueue(struct cfil_queue *cfq, mbuf_t m, size_t len)
 989 {
 990         CFIL_QUEUE_VERIFY(cfq);
 991
 992         MBUFQ_ENQUEUE(&cfq->q_mq, m);
 993         cfq->q_end += len;
 994
 995         CFIL_QUEUE_VERIFY(cfq);
 996 }
 997
 998 static void
 999 cfil_queue_remove(struct cfil_queue *cfq, mbuf_t m, size_t len)
1000 {
1001         CFIL_QUEUE_VERIFY(cfq);
1002
1003         VERIFY(cfil_data_length(m, NULL, NULL) == len);
1004
1005         MBUFQ_REMOVE(&cfq->q_mq, m);
1006         MBUFQ_NEXT(m) = NULL;
1007         cfq->q_start += len;
1008
1009         CFIL_QUEUE_VERIFY(cfq);
1010 }
1011
1012 static mbuf_t
1013 cfil_queue_first(struct cfil_queue *cfq)
1014 {
1015         return (MBUFQ_FIRST(&cfq->q_mq));
1016 }
1017
1018 static mbuf_t
1019 cfil_queue_next(struct cfil_queue *cfq, mbuf_t m)
1020 {
1021 #pragma unused(cfq)
1022         return (MBUFQ_NEXT(m));
1023 }
1024
1025 static void
1026 cfil_entry_buf_verify(struct cfe_buf *cfe_buf)
1027 {
1028         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_ctl_q);
1029         CFIL_QUEUE_VERIFY(&cfe_buf->cfe_pending_q);
1030
1031         /* Verify the queues are ordered so that pending is before ctl */
1032         VERIFY(cfe_buf->cfe_ctl_q.q_start >= cfe_buf->cfe_pending_q.q_end);
1033
1034         /* The peek offset cannot be less than the pass offset */
1035         VERIFY(cfe_buf->cfe_peek_offset >= cfe_buf->cfe_pass_offset);
1036
1037         /* Make sure we've updated the offset we peeked at  */
1038         VERIFY(cfe_buf->cfe_ctl_q.q_start <= cfe_buf->cfe_peeked);
1039 }
1040
1041 static void
1042 cfil_entry_verify(struct cfil_entry *entry)
1043 {
1044         cfil_entry_buf_verify(&entry->cfe_snd);
1045         cfil_entry_buf_verify(&entry->cfe_rcv);
1046 }
1047
1048 static void
1049 cfil_info_buf_verify(struct cfi_buf *cfi_buf)
1050 {
1051         CFIL_QUEUE_VERIFY(&cfi_buf->cfi_inject_q);
1052
1053         VERIFY(cfi_buf->cfi_pending_first <= cfi_buf->cfi_pending_last);
1054         VERIFY(cfi_buf->cfi_pending_mbcnt >= 0);
1055 }
1056
1057 static void
1058 cfil_info_verify(struct cfil_info *cfil_info)
1059 {
1060         int i;
1061
1062         if (cfil_info == NULL)
1063                 return;
1064
1065         cfil_info_buf_verify(&cfil_info->cfi_snd);
1066         cfil_info_buf_verify(&cfil_info->cfi_rcv);
1067
1068         for (i = 0; i < MAX_CONTENT_FILTER; i++)
1069                 cfil_entry_verify(&cfil_info->cfi_entries[i]);
1070 }
1071
1072 static void
1073 verify_content_filter(struct content_filter *cfc)
1074 {
1075         struct cfil_entry *entry;
1076         uint32_t count = 0;
1077
1078         VERIFY(cfc->cf_sock_count >= 0);
1079
1080         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1081                 count++;
1082                 VERIFY(cfc == entry->cfe_filter);
1083         }
1084         VERIFY(count == cfc->cf_sock_count);
1085 }
1086
1087 /*
1088  * Kernel control socket callbacks
1089  */
1090 static errno_t
1091 cfil_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
1092                 void **unitinfo)
1093 {
1094         errno_t error = 0;
1095         struct content_filter *cfc = NULL;
1096
1097         CFIL_LOG(LOG_NOTICE, "");
1098
1099         cfc = zalloc(content_filter_zone);
1100         if (cfc == NULL) {
1101                 CFIL_LOG(LOG_ERR, "zalloc failed");
1102                 error = ENOMEM;
1103                 goto done;
1104         }
1105         bzero(cfc, sizeof(struct content_filter));
1106
1107         cfil_rw_lock_exclusive(&cfil_lck_rw);
1108         if (content_filters == NULL) {
1109                 struct content_filter **tmp;
1110
1111                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1112
1113                 MALLOC(tmp,
1114                         struct content_filter **,
1115                         MAX_CONTENT_FILTER * sizeof(struct content_filter *),
1116                         M_TEMP,
1117                         M_WAITOK | M_ZERO);
1118
1119                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1120
1121                 if (tmp == NULL && content_filters == NULL) {
1122                         error = ENOMEM;
1123                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1124                         goto done;
1125                 }
1126                 /* Another thread may have won the race */
1127                 if (content_filters != NULL)
1128                         FREE(tmp, M_TEMP);
1129                 else
1130                         content_filters = tmp;
1131         }
1132
1133         if (sac->sc_unit == 0 || sac->sc_unit > MAX_CONTENT_FILTER) {
1134                 CFIL_LOG(LOG_ERR, "bad sc_unit %u", sac->sc_unit);
1135                 error = EINVAL;
1136         } else if (content_filters[sac->sc_unit - 1] != NULL) {
1137                 CFIL_LOG(LOG_ERR, "sc_unit %u in use", sac->sc_unit);
1138                 error = EADDRINUSE;
1139         } else {
1140                 /*
1141                  * kernel control socket kcunit numbers start at 1
1142                  */
1143                 content_filters[sac->sc_unit - 1] = cfc;
1144
1145                 cfc->cf_kcref = kctlref;
1146                 cfc->cf_kcunit = sac->sc_unit;
1147                 TAILQ_INIT(&cfc->cf_sock_entries);
1148
1149                 *unitinfo = cfc;
1150                 cfil_active_count++;
1151         }
1152         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1153 done:
1154         if (error != 0 && cfc != NULL)
1155                 zfree(content_filter_zone, cfc);
1156
1157         if (error == 0)
1158                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_ok);
1159         else
1160                 OSIncrementAtomic(&cfil_stats.cfs_ctl_connect_fail);
1161
1162         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1163                 error, cfil_active_count, sac->sc_unit);
1164
1165         return (error);
1166 }
1167
1168 static errno_t
1169 cfil_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo)
1170 {
1171 #pragma unused(kctlref)
1172         errno_t error = 0;
1173         struct content_filter *cfc;
1174         struct cfil_entry *entry;
1175         uint64_t sock_flow_id = 0;
1176
1177         CFIL_LOG(LOG_NOTICE, "");
1178
1179         if (content_filters == NULL) {
1180                 CFIL_LOG(LOG_ERR, "no content filter");
1181                 error = EINVAL;
1182                 goto done;
1183         }
1184         if (kcunit > MAX_CONTENT_FILTER) {
1185                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1186                         kcunit, MAX_CONTENT_FILTER);
1187                 error = EINVAL;
1188                 goto done;
1189         }
1190
1191         cfc = (struct content_filter *)unitinfo;
1192         if (cfc == NULL)
1193                 goto done;
1194
1195         cfil_rw_lock_exclusive(&cfil_lck_rw);
1196         if (content_filters[kcunit - 1] != cfc || cfc->cf_kcunit != kcunit) {
1197                 CFIL_LOG(LOG_ERR, "bad unit info %u)",
1198                         kcunit);
1199                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
1200                 goto done;
1201         }
1202         cfc->cf_flags |= CFF_DETACHING;
1203         /*
1204          * Remove all sockets from the filter
1205          */
1206         while ((entry = TAILQ_FIRST(&cfc->cf_sock_entries)) != NULL) {
1207                 cfil_rw_lock_assert_held(&cfil_lck_rw, 1);
1208
1209                 verify_content_filter(cfc);
1210                 /*
1211                  * Accept all outstanding data by pushing to next filter
1212                  * or back to socket
1213                  *
1214                  * TBD: Actually we should make sure all data has been pushed
1215                  * back to socket
1216                  */
1217                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
1218                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
1219                         struct socket *so = cfil_info->cfi_so;
1220                         sock_flow_id = cfil_info->cfi_sock_id;
1221
1222                         /* Need to let data flow immediately */
1223                         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED |
1224                                 CFEF_DATA_START;
1225
1226                         /*
1227                          * Respect locking hierarchy
1228                          */
1229                         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1230
1231                         socket_lock(so, 1);
1232
1233                         /*
1234                          * When cfe_filter is NULL the filter is detached
1235                          * and the entry has been removed from cf_sock_entries
1236                          */
1237                         if ((so->so_cfil == NULL && so->so_cfil_db == NULL) || entry->cfe_filter == NULL) {
1238                                 cfil_rw_lock_exclusive(&cfil_lck_rw);
1239                                 goto release;
1240                         }
1241
1242                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 1,
1243                                         CFM_MAX_OFFSET,
1244                                         CFM_MAX_OFFSET);
1245
1246                         (void) cfil_action_data_pass(so, cfil_info, kcunit, 0,
1247                                         CFM_MAX_OFFSET,
1248                                         CFM_MAX_OFFSET);
1249
1250                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1251
1252                         /*
1253                          * Check again to make sure if the cfil_info is still valid
1254                          * as the socket may have been unlocked when when calling
1255                          * cfil_acquire_sockbuf()
1256                          */
1257                         if (entry->cfe_filter == NULL ||
1258                                 (so->so_cfil == NULL && cfil_db_get_cfil_info(so->so_cfil_db, sock_flow_id) == NULL)) {
1259                                 goto release;
1260                         }
1261
1262                         /* The filter is now detached */
1263                         entry->cfe_flags |= CFEF_CFIL_DETACHED;
1264 #if LIFECYCLE_DEBUG
1265                         cfil_info_log(LOG_DEBUG, cfil_info, "CFIL: LIFECYCLE: - FILTER DISCONNECTED");
1266 #endif
1267                         CFIL_LOG(LOG_NOTICE, "so %llx detached %u",
1268                                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1269                         if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
1270                             cfil_filters_attached(so) == 0) {
1271                                 CFIL_LOG(LOG_NOTICE, "so %llx waking",
1272                                         (uint64_t)VM_KERNEL_ADDRPERM(so));
1273                                 wakeup((caddr_t)cfil_info);
1274                         }
1275
1276                         /*
1277                          * Remove the filter entry from the content filter
1278                          * but leave the rest of the state intact as the queues
1279                          * may not be empty yet
1280                          */
1281                         entry->cfe_filter = NULL;
1282                         entry->cfe_necp_control_unit = 0;
1283
1284                         TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
1285                         cfc->cf_sock_count--;
1286 release:
1287                         socket_unlock(so, 1);
1288                 }
1289         }
1290         verify_content_filter(cfc);
1291
1292         VERIFY(cfc->cf_sock_count == 0);
1293
1294         /*
1295          * Make filter inactive
1296          */
1297         content_filters[kcunit - 1] = NULL;
1298         cfil_active_count--;
1299         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1300
1301         zfree(content_filter_zone, cfc);
1302 done:
1303         if (error == 0)
1304                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_ok);
1305         else
1306                 OSIncrementAtomic(&cfil_stats.cfs_ctl_disconnect_fail);
1307
1308         CFIL_LOG(LOG_INFO, "return %d cfil_active_count %u kcunit %u",
1309                 error, cfil_active_count, kcunit);
1310
1311         return (error);
1312 }
1313
1314 /*
1315  * cfil_acquire_sockbuf()
1316  *
1317  * Prevent any other thread from acquiring the sockbuf
1318  * We use sb_cfil_thread as a semaphore to prevent other threads from
1319  * messing with the sockbuf -- see sblock()
1320  * Note: We do not set SB_LOCK here because the thread may check or modify
1321  * SB_LOCK several times until it calls cfil_release_sockbuf() -- currently
1322  * sblock(), sbunlock() or sodefunct()
1323  */
1324 static int
1325 cfil_acquire_sockbuf(struct socket *so, struct cfil_info *cfil_info, int outgoing)
1326 {
1327         thread_t tp = current_thread();
1328         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1329         lck_mtx_t *mutex_held;
1330         int error = 0;
1331
1332         /*
1333          * Wait until no thread is holding the sockbuf and other content
1334          * filter threads have released the sockbuf
1335          */
1336         while ((sb->sb_flags & SB_LOCK) ||
1337                 (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)) {
1338                 if (so->so_proto->pr_getlock != NULL)
1339                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
1340                 else
1341                         mutex_held = so->so_proto->pr_domain->dom_mtx;
1342
1343                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
1344
1345                 sb->sb_wantlock++;
1346                 VERIFY(sb->sb_wantlock != 0);
1347
1348                 msleep(&sb->sb_flags, mutex_held, PSOCK, "cfil_acquire_sockbuf",
1349                         NULL);
1350
1351                 VERIFY(sb->sb_wantlock != 0);
1352                 sb->sb_wantlock--;
1353         }
1354         /*
1355          * Use reference count for repetitive calls on same thread
1356          */
1357         if (sb->sb_cfil_refs == 0) {
1358                 VERIFY(sb->sb_cfil_thread == NULL);
1359                 VERIFY((sb->sb_flags & SB_LOCK) == 0);
1360
1361                 sb->sb_cfil_thread = tp;
1362                 sb->sb_flags |= SB_LOCK;
1363         }
1364         sb->sb_cfil_refs++;
1365
1366         /* We acquire the socket buffer when we need to cleanup */
1367         if (cfil_info == NULL) {
1368                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
1369                         (uint64_t)VM_KERNEL_ADDRPERM(so));
1370                 error = 0;
1371         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1372                 CFIL_LOG(LOG_ERR, "so %llx drop set",
1373                         (uint64_t)VM_KERNEL_ADDRPERM(so));
1374                 error = EPIPE;
1375         }
1376
1377         return (error);
1378 }
1379
1380 static void
1381 cfil_release_sockbuf(struct socket *so, int outgoing)
1382 {
1383         struct sockbuf *sb = outgoing ? &so->so_snd : &so->so_rcv;
1384         thread_t tp = current_thread();
1385
1386         socket_lock_assert_owned(so);
1387
1388         if (sb->sb_cfil_thread != NULL && sb->sb_cfil_thread != tp)
1389                 panic("%s sb_cfil_thread %p not current %p", __func__,
1390                         sb->sb_cfil_thread, tp);
1391         /*
1392          * Don't panic if we are defunct because SB_LOCK has
1393          * been cleared by sodefunct()
1394          */
1395         if (!(so->so_flags & SOF_DEFUNCT) && !(sb->sb_flags & SB_LOCK))
1396                 panic("%s SB_LOCK not set on %p", __func__,
1397                         sb);
1398         /*
1399          * We can unlock when the thread unwinds to the last reference
1400          */
1401         sb->sb_cfil_refs--;
1402         if (sb->sb_cfil_refs == 0) {
1403                 sb->sb_cfil_thread = NULL;
1404                 sb->sb_flags &= ~SB_LOCK;
1405
1406                 if (sb->sb_wantlock > 0)
1407                         wakeup(&sb->sb_flags);
1408         }
1409 }
1410
1411 cfil_sock_id_t
1412 cfil_sock_id_from_socket(struct socket *so)
1413 {
1414         if ((so->so_flags & SOF_CONTENT_FILTER) && so->so_cfil)
1415                 return (so->so_cfil->cfi_sock_id);
1416         else
1417                 return (CFIL_SOCK_ID_NONE);
1418 }
1419
1420 static bool
1421 cfil_socket_safe_lock(struct inpcb *inp)
1422 {
1423     if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING) {
1424         socket_lock(inp->inp_socket, 1);
1425         if (in_pcb_checkstate(inp, WNT_RELEASE, 1) != WNT_STOPUSING) {
1426             return true;
1427         }
1428         socket_unlock(inp->inp_socket, 1);
1429     }
1430     return false;
1431 }
1432
1433 static struct socket *
1434 cfil_socket_from_sock_id(cfil_sock_id_t cfil_sock_id, bool udp_only)
1435 {
1436         struct socket *so = NULL;
1437         u_int64_t gencnt = cfil_sock_id >> 32;
1438         u_int32_t flowhash = (u_int32_t)(cfil_sock_id & 0x0ffffffff);
1439         struct inpcb *inp = NULL;
1440         struct inpcbinfo *pcbinfo = NULL;
1441
1442 #if VERDICT_DEBUG
1443         CFIL_LOG(LOG_ERR, "CFIL: VERDICT: search for socket: id %llu gencnt %llx flowhash %x", cfil_sock_id, gencnt, flowhash);
1444 #endif
1445
1446         if (udp_only)
1447                 goto find_udp;
1448
1449         pcbinfo = &tcbinfo;
1450         lck_rw_lock_shared(pcbinfo->ipi_lock);
1451         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1452                 if (inp->inp_state != INPCB_STATE_DEAD &&
1453                         inp->inp_socket != NULL &&
1454                         inp->inp_flowhash == flowhash &&
1455                         (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt &&
1456                         inp->inp_socket->so_cfil != NULL) {
1457                         if (cfil_socket_safe_lock(inp))
1458                                 so = inp->inp_socket;
1459                         break;
1460                 }
1461         }
1462         lck_rw_done(pcbinfo->ipi_lock);
1463         if (so != NULL) {
1464                 goto done;
1465         }
1466
1467 find_udp:
1468
1469         pcbinfo = &udbinfo;
1470         lck_rw_lock_shared(pcbinfo->ipi_lock);
1471         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1472                 if (inp->inp_state != INPCB_STATE_DEAD &&
1473                         inp->inp_socket != NULL &&
1474                         inp->inp_socket->so_cfil_db != NULL &&
1475                         (inp->inp_socket->so_gencnt & 0x0ffffffff) == gencnt) {
1476                         if (cfil_socket_safe_lock(inp))
1477                                 so = inp->inp_socket;
1478                         break;
1479                 }
1480         }
1481         lck_rw_done(pcbinfo->ipi_lock);
1482
1483 done:
1484         if (so == NULL) {
1485                 OSIncrementAtomic(&cfil_stats.cfs_sock_id_not_found);
1486                 CFIL_LOG(LOG_DEBUG,
1487                         "no socket for sock_id %llx gencnt %llx flowhash %x",
1488                         cfil_sock_id, gencnt, flowhash);
1489         }
1490
1491         return (so);
1492 }
1493
1494 static struct socket *
1495 cfil_socket_from_client_uuid(uuid_t necp_client_uuid, bool *cfil_attached)
1496 {
1497         struct socket *so = NULL;
1498         struct inpcb *inp = NULL;
1499         struct inpcbinfo *pcbinfo = &tcbinfo;
1500
1501         lck_rw_lock_shared(pcbinfo->ipi_lock);
1502         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1503                 if (inp->inp_state != INPCB_STATE_DEAD &&
1504                         inp->inp_socket != NULL &&
1505                         uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1506                         *cfil_attached = (inp->inp_socket->so_cfil != NULL);
1507                         if (cfil_socket_safe_lock(inp))
1508                                 so = inp->inp_socket;
1509                         break;
1510                 }
1511         }
1512         lck_rw_done(pcbinfo->ipi_lock);
1513         if (so != NULL) {
1514                 goto done;
1515         }
1516
1517         pcbinfo = &udbinfo;
1518         lck_rw_lock_shared(pcbinfo->ipi_lock);
1519         LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
1520                 if (inp->inp_state != INPCB_STATE_DEAD &&
1521                         inp->inp_socket != NULL &&
1522                         uuid_compare(inp->necp_client_uuid, necp_client_uuid) == 0) {
1523                         *cfil_attached = (inp->inp_socket->so_cfil_db != NULL);
1524                         if (cfil_socket_safe_lock(inp))
1525                                 so = inp->inp_socket;
1526                         break;
1527                 }
1528         }
1529         lck_rw_done(pcbinfo->ipi_lock);
1530
1531 done:
1532         return (so);
1533 }
1534
1535 static errno_t
1536 cfil_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, mbuf_t m,
1537                 int flags)
1538 {
1539 #pragma unused(kctlref, flags)
1540         errno_t error = 0;
1541         struct cfil_msg_hdr *msghdr;
1542         struct content_filter *cfc = (struct content_filter *)unitinfo;
1543         struct socket *so;
1544         struct cfil_msg_action *action_msg;
1545         struct cfil_entry *entry;
1546         struct cfil_info *cfil_info = NULL;
1547
1548         CFIL_LOG(LOG_INFO, "");
1549
1550         if (content_filters == NULL) {
1551                 CFIL_LOG(LOG_ERR, "no content filter");
1552                 error = EINVAL;
1553                 goto done;
1554         }
1555         if (kcunit > MAX_CONTENT_FILTER) {
1556                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1557                         kcunit, MAX_CONTENT_FILTER);
1558                 error = EINVAL;
1559                 goto done;
1560         }
1561
1562         if (m_length(m) < sizeof(struct cfil_msg_hdr)) {
1563                 CFIL_LOG(LOG_ERR, "too short %u", m_length(m));
1564                 error = EINVAL;
1565                 goto done;
1566         }
1567         msghdr = (struct cfil_msg_hdr *)mbuf_data(m);
1568         if (msghdr->cfm_version != CFM_VERSION_CURRENT) {
1569                 CFIL_LOG(LOG_ERR, "bad version %u", msghdr->cfm_version);
1570                 error = EINVAL;
1571                 goto done;
1572         }
1573         if (msghdr->cfm_type != CFM_TYPE_ACTION) {
1574                 CFIL_LOG(LOG_ERR, "bad type %u", msghdr->cfm_type);
1575                 error = EINVAL;
1576                 goto done;
1577         }
1578         /* Validate action operation */
1579         switch (msghdr->cfm_op) {
1580                 case CFM_OP_DATA_UPDATE:
1581                         OSIncrementAtomic(
1582                                 &cfil_stats.cfs_ctl_action_data_update);
1583                         break;
1584                 case CFM_OP_DROP:
1585                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_drop);
1586                         break;
1587                 case CFM_OP_BLESS_CLIENT:
1588                         if (msghdr->cfm_len != sizeof(struct cfil_msg_bless_client)) {
1589                                 OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1590                                 error = EINVAL;
1591                                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1592                                                  msghdr->cfm_len,
1593                                                  msghdr->cfm_op);
1594                                 goto done;
1595                         }
1596                         error = cfil_action_bless_client(kcunit, msghdr);
1597                         goto done;
1598                 default:
1599                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_op);
1600                         CFIL_LOG(LOG_ERR, "bad op %u", msghdr->cfm_op);
1601                         error = EINVAL;
1602                         goto done;
1603                 }
1604                 if (msghdr->cfm_len != sizeof(struct cfil_msg_action)) {
1605                         OSIncrementAtomic(&cfil_stats.cfs_ctl_action_bad_len);
1606                                 error = EINVAL;
1607                                 CFIL_LOG(LOG_ERR, "bad len: %u for op %u",
1608                                         msghdr->cfm_len,
1609                                         msghdr->cfm_op);
1610                                 goto done;
1611                         }
1612         cfil_rw_lock_shared(&cfil_lck_rw);
1613         if (cfc != (void *)content_filters[kcunit - 1]) {
1614                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1615                         kcunit);
1616                 error = EINVAL;
1617                 cfil_rw_unlock_shared(&cfil_lck_rw);
1618                 goto done;
1619         }
1620         cfil_rw_unlock_shared(&cfil_lck_rw);
1621
1622         // Search for socket (TCP+UDP and lock so)
1623         so = cfil_socket_from_sock_id(msghdr->cfm_sock_id, false);
1624         if (so == NULL) {
1625                 CFIL_LOG(LOG_NOTICE, "bad sock_id %llx",
1626                         msghdr->cfm_sock_id);
1627                 error = EINVAL;
1628                 goto done;
1629         }
1630
1631         cfil_info = so->so_cfil_db != NULL ?
1632                 cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
1633
1634         if (cfil_info == NULL) {
1635                 CFIL_LOG(LOG_NOTICE, "so %llx <id %llu> not attached",
1636                                  (uint64_t)VM_KERNEL_ADDRPERM(so), msghdr->cfm_sock_id);
1637                 error = EINVAL;
1638                 goto unlock;
1639         } else if (cfil_info->cfi_flags & CFIF_DROP) {
1640                 CFIL_LOG(LOG_NOTICE, "so %llx drop set",
1641                         (uint64_t)VM_KERNEL_ADDRPERM(so));
1642                 error = EINVAL;
1643                 goto unlock;
1644         }
1645         entry = &cfil_info->cfi_entries[kcunit - 1];
1646         if (entry->cfe_filter == NULL) {
1647                 CFIL_LOG(LOG_NOTICE, "so %llx no filter",
1648                         (uint64_t)VM_KERNEL_ADDRPERM(so));
1649                 error = EINVAL;
1650                 goto unlock;
1651         }
1652
1653         if (entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED)
1654                 entry->cfe_flags |= CFEF_DATA_START;
1655         else {
1656                 CFIL_LOG(LOG_ERR,
1657                         "so %llx attached not sent for %u",
1658                         (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
1659                 error = EINVAL;
1660                 goto unlock;
1661         }
1662
1663         microuptime(&entry->cfe_last_action);
1664         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_action, &cfil_info->cfi_first_event, msghdr->cfm_op);
1665
1666         action_msg = (struct cfil_msg_action *)msghdr;
1667
1668         switch (msghdr->cfm_op) {
1669                 case CFM_OP_DATA_UPDATE:
1670 #if VERDICT_DEBUG
1671                         CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: <so %llx sockID %llu> <IN peek:%llu pass:%llu, OUT peek:%llu pass:%llu>",
1672                                          (uint64_t)VM_KERNEL_ADDRPERM(so),
1673                                          cfil_info->cfi_sock_id,
1674                                          action_msg->cfa_in_peek_offset, action_msg->cfa_in_pass_offset,
1675                                          action_msg->cfa_out_peek_offset, action_msg->cfa_out_pass_offset);
1676 #endif
1677                         if (action_msg->cfa_out_peek_offset != 0 ||
1678                                 action_msg->cfa_out_pass_offset != 0)
1679                                 error = cfil_action_data_pass(so, cfil_info, kcunit, 1,
1680                                         action_msg->cfa_out_pass_offset,
1681                                         action_msg->cfa_out_peek_offset);
1682                         if (error == EJUSTRETURN)
1683                                 error = 0;
1684                         if (error != 0)
1685                                 break;
1686                         if (action_msg->cfa_in_peek_offset != 0 ||
1687                                 action_msg->cfa_in_pass_offset != 0)
1688                                 error = cfil_action_data_pass(so, cfil_info, kcunit, 0,
1689                                         action_msg->cfa_in_pass_offset,
1690                                         action_msg->cfa_in_peek_offset);
1691                         if (error == EJUSTRETURN)
1692                                 error = 0;
1693                         break;
1694
1695                 case CFM_OP_DROP:
1696                         error = cfil_action_drop(so, cfil_info, kcunit);
1697                         break;
1698
1699                 default:
1700                         error = EINVAL;
1701                         break;
1702         }
1703 unlock:
1704         socket_unlock(so, 1);
1705 done:
1706         mbuf_freem(m);
1707
1708         if (error == 0)
1709                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_ok);
1710         else
1711                 OSIncrementAtomic(&cfil_stats.cfs_ctl_send_bad);
1712
1713         return (error);
1714 }
1715
1716 static errno_t
1717 cfil_ctl_getopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1718                 int opt, void *data, size_t *len)
1719 {
1720 #pragma unused(kctlref, opt)
1721         struct cfil_info *cfil_info = NULL;
1722         errno_t error = 0;
1723         struct content_filter *cfc = (struct content_filter *)unitinfo;
1724
1725         CFIL_LOG(LOG_NOTICE, "");
1726
1727         cfil_rw_lock_shared(&cfil_lck_rw);
1728
1729         if (content_filters == NULL) {
1730                 CFIL_LOG(LOG_ERR, "no content filter");
1731                 error = EINVAL;
1732                 goto done;
1733         }
1734         if (kcunit > MAX_CONTENT_FILTER) {
1735                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1736                         kcunit, MAX_CONTENT_FILTER);
1737                 error = EINVAL;
1738                 goto done;
1739         }
1740         if (cfc != (void *)content_filters[kcunit - 1]) {
1741                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1742                         kcunit);
1743                 error = EINVAL;
1744                 goto done;
1745         }
1746         switch (opt) {
1747                 case CFIL_OPT_NECP_CONTROL_UNIT:
1748                         if (*len < sizeof(uint32_t)) {
1749                                 CFIL_LOG(LOG_ERR, "len too small %lu", *len);
1750                                 error = EINVAL;
1751                                 goto done;
1752                         }
1753                         if (data != NULL) {
1754                                 *(uint32_t *)data = cfc->cf_necp_control_unit;
1755                         }
1756                         break;
1757                 case CFIL_OPT_GET_SOCKET_INFO:
1758                         if (*len != sizeof(struct cfil_opt_sock_info)) {
1759                                 CFIL_LOG(LOG_ERR, "len does not match %lu", *len);
1760                                 error = EINVAL;
1761                                 goto done;
1762                         }
1763                         if (data == NULL) {
1764                                 CFIL_LOG(LOG_ERR, "data not passed");
1765                                 error = EINVAL;
1766                                 goto done;
1767                         }
1768
1769                         struct cfil_opt_sock_info *sock_info =
1770                                                                                         (struct cfil_opt_sock_info *) data;
1771
1772                         // Unlock here so that we never hold both cfil_lck_rw and the
1773                         // socket_lock at the same time. Otherwise, this can deadlock
1774                         // because soclose() takes the socket_lock and then exclusive
1775                         // cfil_lck_rw and we require the opposite order.
1776
1777                         // WARNING: Be sure to never use anything protected
1778                         //     by cfil_lck_rw beyond this point.
1779                         // WARNING: Be sure to avoid fallthrough and
1780                         //     goto return_already_unlocked from this branch.
1781                         cfil_rw_unlock_shared(&cfil_lck_rw);
1782
1783                         // Search (TCP+UDP) and lock socket
1784                         struct socket *sock =
1785                                 cfil_socket_from_sock_id(sock_info->cfs_sock_id, false);
1786                         if (sock == NULL) {
1787 #if LIFECYCLE_DEBUG
1788                                 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: bad sock_id %llu",
1789                                                  sock_info->cfs_sock_id);
1790 #endif
1791                                 error = ENOENT;
1792                                 goto return_already_unlocked;
1793                         }
1794
1795                         cfil_info = (sock->so_cfil_db != NULL) ?
1796                                 cfil_db_get_cfil_info(sock->so_cfil_db, sock_info->cfs_sock_id) : sock->so_cfil;
1797
1798                         if (cfil_info == NULL) {
1799 #if LIFECYCLE_DEBUG
1800                                 CFIL_LOG(LOG_ERR, "CFIL: GET_SOCKET_INFO failed: so %llx not attached, cannot fetch info",
1801                                         (uint64_t)VM_KERNEL_ADDRPERM(sock));
1802 #endif
1803                                 error = EINVAL;
1804                                 socket_unlock(sock, 1);
1805                                 goto return_already_unlocked;
1806                         }
1807
1808                         // Fill out family, type, and protocol
1809                         sock_info->cfs_sock_family = sock->so_proto->pr_domain->dom_family;
1810                         sock_info->cfs_sock_type = sock->so_proto->pr_type;
1811                         sock_info->cfs_sock_protocol = sock->so_proto->pr_protocol;
1812
1813                         // Source and destination addresses
1814                         struct inpcb *inp = sotoinpcb(sock);
1815                         if (inp->inp_vflag & INP_IPV6) {
1816                                 struct in6_addr *laddr = NULL, *faddr = NULL;
1817                                 u_int16_t lport = 0, fport = 0;
1818
1819                                 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
1820                                                                                  &laddr, &faddr, &lport, &fport);
1821                                 fill_ip6_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1822                                 fill_ip6_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1823                         } else if (inp->inp_vflag & INP_IPV4) {
1824                                 struct in_addr laddr = {0}, faddr = {0};
1825                                 u_int16_t lport = 0, fport = 0;
1826
1827                                 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
1828                                                                           &laddr, &faddr, &lport, &fport);
1829                                 fill_ip_sockaddr_4_6(&sock_info->cfs_local, laddr, lport);
1830                                 fill_ip_sockaddr_4_6(&sock_info->cfs_remote, faddr, fport);
1831                         }
1832
1833                         // Set the pid info
1834                         sock_info->cfs_pid = sock->last_pid;
1835                         memcpy(sock_info->cfs_uuid, sock->last_uuid, sizeof(uuid_t));
1836
1837                         if (sock->so_flags & SOF_DELEGATED) {
1838                                 sock_info->cfs_e_pid = sock->e_pid;
1839                                 memcpy(sock_info->cfs_e_uuid, sock->e_uuid, sizeof(uuid_t));
1840                         } else {
1841                                 sock_info->cfs_e_pid = sock->last_pid;
1842                                 memcpy(sock_info->cfs_e_uuid, sock->last_uuid, sizeof(uuid_t));
1843                         }
1844
1845                         socket_unlock(sock, 1);
1846
1847                         goto return_already_unlocked;
1848                 default:
1849                         error = ENOPROTOOPT;
1850                         break;
1851         }
1852 done:
1853         cfil_rw_unlock_shared(&cfil_lck_rw);
1854
1855         return (error);
1856
1857 return_already_unlocked:
1858
1859         return (error);
1860 }
1861
1862 static errno_t
1863 cfil_ctl_setopt(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
1864                 int opt, void *data, size_t len)
1865 {
1866 #pragma unused(kctlref, opt)
1867         errno_t error = 0;
1868         struct content_filter *cfc = (struct content_filter *)unitinfo;
1869
1870         CFIL_LOG(LOG_NOTICE, "");
1871
1872         cfil_rw_lock_exclusive(&cfil_lck_rw);
1873
1874         if (content_filters == NULL) {
1875                 CFIL_LOG(LOG_ERR, "no content filter");
1876                 error = EINVAL;
1877                 goto done;
1878         }
1879         if (kcunit > MAX_CONTENT_FILTER) {
1880                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1881                         kcunit, MAX_CONTENT_FILTER);
1882                 error = EINVAL;
1883                 goto done;
1884         }
1885         if (cfc != (void *)content_filters[kcunit - 1]) {
1886                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1887                         kcunit);
1888                 error = EINVAL;
1889                 goto done;
1890         }
1891         switch (opt) {
1892                 case CFIL_OPT_NECP_CONTROL_UNIT:
1893                         if (len < sizeof(uint32_t)) {
1894                                 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1895                                         "len too small %lu", len);
1896                                 error = EINVAL;
1897                                 goto done;
1898                         }
1899                         if (cfc->cf_necp_control_unit != 0) {
1900                                 CFIL_LOG(LOG_ERR, "CFIL_OPT_NECP_CONTROL_UNIT "
1901                                         "already set %u",
1902                                         cfc->cf_necp_control_unit);
1903                                 error = EINVAL;
1904                                 goto done;
1905                         }
1906                         cfc->cf_necp_control_unit = *(uint32_t *)data;
1907                         break;
1908                 default:
1909                         error = ENOPROTOOPT;
1910                         break;
1911         }
1912 done:
1913         cfil_rw_unlock_exclusive(&cfil_lck_rw);
1914
1915         return (error);
1916 }
1917
1918
1919 static void
1920 cfil_ctl_rcvd(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo, int flags)
1921 {
1922 #pragma unused(kctlref, flags)
1923         struct content_filter *cfc = (struct content_filter *)unitinfo;
1924         struct socket *so = NULL;
1925         int error;
1926         struct cfil_entry *entry;
1927         struct cfil_info *cfil_info = NULL;
1928
1929         CFIL_LOG(LOG_INFO, "");
1930
1931         if (content_filters == NULL) {
1932                 CFIL_LOG(LOG_ERR, "no content filter");
1933                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1934                 return;
1935         }
1936         if (kcunit > MAX_CONTENT_FILTER) {
1937                 CFIL_LOG(LOG_ERR, "kcunit %u > MAX_CONTENT_FILTER (%d)",
1938                         kcunit, MAX_CONTENT_FILTER);
1939                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1940                 return;
1941         }
1942         cfil_rw_lock_shared(&cfil_lck_rw);
1943         if (cfc != (void *)content_filters[kcunit - 1]) {
1944                 CFIL_LOG(LOG_ERR, "unitinfo does not match for kcunit %u",
1945                         kcunit);
1946                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_bad);
1947                 goto done;
1948         }
1949         /* Let's assume the flow control is lifted */
1950         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
1951                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
1952                         cfil_rw_lock_exclusive(&cfil_lck_rw);
1953
1954         cfc->cf_flags &= ~CFF_FLOW_CONTROLLED;
1955
1956                 cfil_rw_lock_exclusive_to_shared(&cfil_lck_rw);
1957                 LCK_RW_ASSERT(&cfil_lck_rw, LCK_RW_ASSERT_SHARED);
1958         }
1959         /*
1960          * Flow control will be raised again as soon as an entry cannot enqueue
1961          * to the kernel control socket
1962          */
1963         while ((cfc->cf_flags & CFF_FLOW_CONTROLLED) == 0) {
1964                 verify_content_filter(cfc);
1965
1966                 cfil_rw_lock_assert_held(&cfil_lck_rw, 0);
1967
1968                 /* Find an entry that is flow controlled */
1969                 TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
1970                         if (entry->cfe_cfil_info == NULL ||
1971                                 entry->cfe_cfil_info->cfi_so == NULL)
1972                                 continue;
1973                         if ((entry->cfe_flags & CFEF_FLOW_CONTROLLED) == 0)
1974                                 continue;
1975                 }
1976                 if (entry == NULL)
1977                         break;
1978
1979                 OSIncrementAtomic(&cfil_stats.cfs_ctl_rcvd_flow_lift);
1980
1981                 cfil_info = entry->cfe_cfil_info;
1982                 so = cfil_info->cfi_so;
1983
1984                 cfil_rw_unlock_shared(&cfil_lck_rw);
1985                 socket_lock(so, 1);
1986
1987                 do {
1988                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
1989                         if (error == 0)
1990                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 1);
1991                         cfil_release_sockbuf(so, 1);
1992                         if (error != 0)
1993                                 break;
1994
1995                         error = cfil_acquire_sockbuf(so, cfil_info, 0);
1996                         if (error == 0)
1997                                 error = cfil_data_service_ctl_q(so, cfil_info, kcunit, 0);
1998                         cfil_release_sockbuf(so, 0);
1999                 } while (0);
2000
2001                 socket_lock_assert_owned(so);
2002                 socket_unlock(so, 1);
2003
2004                 cfil_rw_lock_shared(&cfil_lck_rw);
2005         }
2006 done:
2007         cfil_rw_unlock_shared(&cfil_lck_rw);
2008 }
2009
2010 void
2011 cfil_init(void)
2012 {
2013         struct kern_ctl_reg kern_ctl;
2014         errno_t error = 0;
2015         vm_size_t content_filter_size = 0;      /* size of content_filter */
2016     vm_size_t cfil_info_size = 0;       /* size of cfil_info */
2017     vm_size_t cfil_hash_entry_size = 0;    /* size of cfil_hash_entry */
2018     vm_size_t cfil_db_size = 0;    /* size of cfil_db */
2019     unsigned int mbuf_limit = 0;
2020
2021         CFIL_LOG(LOG_NOTICE, "");
2022
2023         /*
2024          * Compile time verifications
2025          */
2026         _CASSERT(CFIL_MAX_FILTER_COUNT == MAX_CONTENT_FILTER);
2027         _CASSERT(sizeof(struct cfil_filter_stat) % sizeof(uint32_t) == 0);
2028         _CASSERT(sizeof(struct cfil_entry_stat) % sizeof(uint32_t) == 0);
2029         _CASSERT(sizeof(struct cfil_sock_stat) % sizeof(uint32_t) == 0);
2030
2031         /*
2032          * Runtime time verifications
2033          */
2034         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_enqueued,
2035                 sizeof(uint32_t)));
2036         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_enqueued,
2037                 sizeof(uint32_t)));
2038         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_in_peeked,
2039                 sizeof(uint32_t)));
2040         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_ctl_q_out_peeked,
2041                 sizeof(uint32_t)));
2042
2043         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_in_enqueued,
2044                 sizeof(uint32_t)));
2045         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_pending_q_out_enqueued,
2046                 sizeof(uint32_t)));
2047
2048         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_enqueued,
2049                 sizeof(uint32_t)));
2050         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_enqueued,
2051                 sizeof(uint32_t)));
2052         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_in_passed,
2053                 sizeof(uint32_t)));
2054         VERIFY(IS_P2ALIGNED(&cfil_stats.cfs_inject_q_out_passed,
2055                 sizeof(uint32_t)));
2056
2057         /*
2058          * Zone for content filters kernel control sockets
2059          */
2060         content_filter_size = sizeof(struct content_filter);
2061         content_filter_zone = zinit(content_filter_size,
2062                                 CONTENT_FILTER_ZONE_MAX * content_filter_size,
2063                                 0,
2064                                 CONTENT_FILTER_ZONE_NAME);
2065         if (content_filter_zone == NULL) {
2066                 panic("%s: zinit(%s) failed", __func__,
2067                         CONTENT_FILTER_ZONE_NAME);
2068                 /* NOTREACHED */
2069         }
2070         zone_change(content_filter_zone, Z_CALLERACCT, FALSE);
2071         zone_change(content_filter_zone, Z_EXPAND, TRUE);
2072
2073         /*
2074          * Zone for per socket content filters
2075          */
2076         cfil_info_size = sizeof(struct cfil_info);
2077         cfil_info_zone = zinit(cfil_info_size,
2078                                 CFIL_INFO_ZONE_MAX * cfil_info_size,
2079                                 0,
2080                                 CFIL_INFO_ZONE_NAME);
2081         if (cfil_info_zone == NULL) {
2082                 panic("%s: zinit(%s) failed", __func__, CFIL_INFO_ZONE_NAME);
2083                 /* NOTREACHED */
2084         }
2085         zone_change(cfil_info_zone, Z_CALLERACCT, FALSE);
2086         zone_change(cfil_info_zone, Z_EXPAND, TRUE);
2087
2088     /*
2089      * Zone for content filters cfil hash entries and db
2090      */
2091     cfil_hash_entry_size = sizeof(struct cfil_hash_entry);
2092     cfil_hash_entry_zone = zinit(cfil_hash_entry_size,
2093                                 CFIL_HASH_ENTRY_ZONE_MAX * cfil_hash_entry_size,
2094                                 0,
2095                                 CFIL_HASH_ENTRY_ZONE_NAME);
2096     if (cfil_hash_entry_zone == NULL) {
2097         panic("%s: zinit(%s) failed", __func__, CFIL_HASH_ENTRY_ZONE_NAME);
2098         /* NOTREACHED */
2099     }
2100     zone_change(cfil_hash_entry_zone, Z_CALLERACCT, FALSE);
2101     zone_change(cfil_hash_entry_zone, Z_EXPAND, TRUE);
2102
2103     cfil_db_size = sizeof(struct cfil_db);
2104     cfil_db_zone = zinit(cfil_db_size,
2105                          CFIL_DB_ZONE_MAX * cfil_db_size,
2106                          0,
2107                          CFIL_DB_ZONE_NAME);
2108     if (cfil_db_zone == NULL) {
2109         panic("%s: zinit(%s) failed", __func__, CFIL_DB_ZONE_NAME);
2110         /* NOTREACHED */
2111     }
2112     zone_change(cfil_db_zone, Z_CALLERACCT, FALSE);
2113     zone_change(cfil_db_zone, Z_EXPAND, TRUE);
2114
2115         /*
2116          * Allocate locks
2117          */
2118         cfil_lck_grp_attr = lck_grp_attr_alloc_init();
2119         if (cfil_lck_grp_attr == NULL) {
2120                 panic("%s: lck_grp_attr_alloc_init failed", __func__);
2121                 /* NOTREACHED */
2122         }
2123         cfil_lck_grp = lck_grp_alloc_init("content filter",
2124                                         cfil_lck_grp_attr);
2125         if (cfil_lck_grp == NULL) {
2126                 panic("%s: lck_grp_alloc_init failed", __func__);
2127                 /* NOTREACHED */
2128         }
2129         cfil_lck_attr = lck_attr_alloc_init();
2130         if (cfil_lck_attr == NULL) {
2131                 panic("%s: lck_attr_alloc_init failed", __func__);
2132                 /* NOTREACHED */
2133         }
2134         lck_rw_init(&cfil_lck_rw, cfil_lck_grp, cfil_lck_attr);
2135
2136         TAILQ_INIT(&cfil_sock_head);
2137
2138         /*
2139          * Register kernel control
2140          */
2141         bzero(&kern_ctl, sizeof(kern_ctl));
2142         strlcpy(kern_ctl.ctl_name, CONTENT_FILTER_CONTROL_NAME,
2143                 sizeof(kern_ctl.ctl_name));
2144         kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED | CTL_FLAG_REG_EXTENDED;
2145         kern_ctl.ctl_sendsize = 512 * 1024; /* enough? */
2146         kern_ctl.ctl_recvsize = 512 * 1024; /* enough? */
2147         kern_ctl.ctl_connect = cfil_ctl_connect;
2148         kern_ctl.ctl_disconnect = cfil_ctl_disconnect;
2149         kern_ctl.ctl_send = cfil_ctl_send;
2150         kern_ctl.ctl_getopt = cfil_ctl_getopt;
2151         kern_ctl.ctl_setopt = cfil_ctl_setopt;
2152         kern_ctl.ctl_rcvd = cfil_ctl_rcvd;
2153         error = ctl_register(&kern_ctl, &cfil_kctlref);
2154         if (error != 0) {
2155                 CFIL_LOG(LOG_ERR, "ctl_register failed: %d", error);
2156                 return;
2157         }
2158
2159         // Spawn thread for gargage collection
2160         if (kernel_thread_start(cfil_udp_gc_thread_func, NULL,
2161                                                         &cfil_udp_gc_thread) != KERN_SUCCESS) {
2162                 panic_plain("%s: Can't create UDP GC thread", __func__);
2163                 /* NOTREACHED */
2164         }
2165         /* this must not fail */
2166         VERIFY(cfil_udp_gc_thread != NULL);
2167
2168         // Set UDP per-flow mbuf thresholds to 1/32 of platform max
2169         mbuf_limit = MAX(UDP_FLOW_GC_MBUF_CNT_MAX, (nmbclusters << MCLSHIFT) >> UDP_FLOW_GC_MBUF_SHIFT);
2170         cfil_udp_gc_mbuf_num_max = (mbuf_limit >> MCLSHIFT);
2171         cfil_udp_gc_mbuf_cnt_max = mbuf_limit;
2172 }
2173
2174 struct cfil_info *
2175 cfil_info_alloc(struct socket *so, struct cfil_hash_entry *hash_entry)
2176 {
2177         int kcunit;
2178         struct cfil_info *cfil_info = NULL;
2179         struct inpcb *inp = sotoinpcb(so);
2180
2181         CFIL_LOG(LOG_INFO, "");
2182
2183         socket_lock_assert_owned(so);
2184
2185         cfil_info = zalloc(cfil_info_zone);
2186         if (cfil_info == NULL)
2187                 goto done;
2188         bzero(cfil_info, sizeof(struct cfil_info));
2189
2190         cfil_queue_init(&cfil_info->cfi_snd.cfi_inject_q);
2191         cfil_queue_init(&cfil_info->cfi_rcv.cfi_inject_q);
2192
2193         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2194                 struct cfil_entry *entry;
2195
2196                 entry = &cfil_info->cfi_entries[kcunit - 1];
2197                 entry->cfe_cfil_info = cfil_info;
2198
2199                 /* Initialize the filter entry */
2200                 entry->cfe_filter = NULL;
2201                 entry->cfe_flags = 0;
2202                 entry->cfe_necp_control_unit = 0;
2203                 entry->cfe_snd.cfe_pass_offset = 0;
2204                 entry->cfe_snd.cfe_peek_offset = 0;
2205                 entry->cfe_snd.cfe_peeked = 0;
2206                 entry->cfe_rcv.cfe_pass_offset = 0;
2207                 entry->cfe_rcv.cfe_peek_offset = 0;
2208                 entry->cfe_rcv.cfe_peeked = 0;
2209                 /*
2210                  * Timestamp the last action to avoid pre-maturely
2211                  * triggering garbage collection
2212                  */
2213                 microuptime(&entry->cfe_last_action);
2214
2215                 cfil_queue_init(&entry->cfe_snd.cfe_pending_q);
2216                 cfil_queue_init(&entry->cfe_rcv.cfe_pending_q);
2217                 cfil_queue_init(&entry->cfe_snd.cfe_ctl_q);
2218                 cfil_queue_init(&entry->cfe_rcv.cfe_ctl_q);
2219         }
2220
2221         cfil_rw_lock_exclusive(&cfil_lck_rw);
2222
2223         /*
2224          * Create a cfi_sock_id that's not the socket pointer!
2225          */
2226
2227     if (hash_entry == NULL) {
2228                 // This is the TCP case, cfil_info is tracked per socket
2229         if (inp->inp_flowhash == 0)
2230             inp->inp_flowhash = inp_calc_flowhash(inp);
2231
2232         so->so_cfil = cfil_info;
2233         cfil_info->cfi_so = so;
2234         cfil_info->cfi_sock_id =
2235         ((so->so_gencnt << 32) | inp->inp_flowhash);
2236     } else {
2237         // This is the UDP case, cfil_info is tracked in per-socket hash
2238                 cfil_info->cfi_so = so;
2239         hash_entry->cfentry_cfil = cfil_info;
2240                 cfil_info->cfi_hash_entry = hash_entry;
2241         cfil_info->cfi_sock_id = ((so->so_gencnt << 32) | (hash_entry->cfentry_flowhash & 0xffffffff));
2242                 CFIL_LOG(LOG_DEBUG, "CFIL: UDP inp_flowhash %x so_gencnt %llx entry flowhash %x sockID %llx",
2243                  inp->inp_flowhash, so->so_gencnt, hash_entry->cfentry_flowhash, cfil_info->cfi_sock_id);
2244
2245                 // Wake up gc thread if this is first flow added
2246                 if (cfil_sock_udp_attached_count == 0) {
2247                         thread_wakeup((caddr_t)&cfil_sock_udp_attached_count);
2248                 }
2249
2250                 cfil_sock_udp_attached_count++;
2251     }
2252
2253         TAILQ_INSERT_TAIL(&cfil_sock_head, cfil_info, cfi_link);
2254
2255         cfil_sock_attached_count++;
2256
2257         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2258
2259 done:
2260         if (cfil_info != NULL)
2261                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_ok);
2262         else
2263                 OSIncrementAtomic(&cfil_stats.cfs_cfi_alloc_fail);
2264
2265         return (cfil_info);
2266 }
2267
2268 int
2269 cfil_info_attach_unit(struct socket *so, uint32_t filter_control_unit, struct cfil_info *cfil_info)
2270 {
2271         int kcunit;
2272         int attached = 0;
2273
2274         CFIL_LOG(LOG_INFO, "");
2275
2276         socket_lock_assert_owned(so);
2277
2278         cfil_rw_lock_exclusive(&cfil_lck_rw);
2279
2280         for (kcunit = 1;
2281                 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2282                 kcunit++) {
2283                 struct content_filter *cfc = content_filters[kcunit - 1];
2284                 struct cfil_entry *entry;
2285
2286                 if (cfc == NULL)
2287                         continue;
2288                 if (cfc->cf_necp_control_unit != filter_control_unit)
2289                         continue;
2290
2291                 entry = &cfil_info->cfi_entries[kcunit - 1];
2292
2293                 entry->cfe_filter = cfc;
2294                 entry->cfe_necp_control_unit = filter_control_unit;
2295                 TAILQ_INSERT_TAIL(&cfc->cf_sock_entries, entry, cfe_link);
2296                 cfc->cf_sock_count++;
2297                 verify_content_filter(cfc);
2298                 attached = 1;
2299                 entry->cfe_flags |= CFEF_CFIL_ATTACHED;
2300                 break;
2301         }
2302
2303         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2304
2305         return (attached);
2306 }
2307
2308 static void
2309 cfil_info_free(struct cfil_info *cfil_info)
2310 {
2311         int kcunit;
2312         uint64_t in_drain = 0;
2313         uint64_t out_drained = 0;
2314
2315         if (cfil_info == NULL)
2316                 return;
2317
2318         CFIL_LOG(LOG_INFO, "");
2319
2320         cfil_rw_lock_exclusive(&cfil_lck_rw);
2321
2322         for (kcunit = 1;
2323                 content_filters != NULL && kcunit <= MAX_CONTENT_FILTER;
2324                 kcunit++) {
2325                 struct cfil_entry *entry;
2326                 struct content_filter *cfc;
2327
2328                 entry = &cfil_info->cfi_entries[kcunit - 1];
2329
2330                 /* Don't be silly and try to detach twice */
2331                 if (entry->cfe_filter == NULL)
2332                         continue;
2333
2334                 cfc = content_filters[kcunit - 1];
2335
2336                 VERIFY(cfc == entry->cfe_filter);
2337
2338                 entry->cfe_filter = NULL;
2339                 entry->cfe_necp_control_unit = 0;
2340                 TAILQ_REMOVE(&cfc->cf_sock_entries, entry, cfe_link);
2341                 cfc->cf_sock_count--;
2342
2343                 verify_content_filter(cfc);
2344         }
2345         if (cfil_info->cfi_hash_entry != NULL)
2346                 cfil_sock_udp_attached_count--;
2347         cfil_sock_attached_count--;
2348         TAILQ_REMOVE(&cfil_sock_head, cfil_info, cfi_link);
2349
2350         out_drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
2351         in_drain += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
2352
2353         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2354                 struct cfil_entry *entry;
2355
2356                 entry = &cfil_info->cfi_entries[kcunit - 1];
2357                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
2358                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_pending_q);
2359                 out_drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
2360                 in_drain += cfil_queue_drain(&entry->cfe_rcv.cfe_ctl_q);
2361         }
2362         cfil_rw_unlock_exclusive(&cfil_lck_rw);
2363
2364         if (out_drained)
2365                 OSIncrementAtomic(&cfil_stats.cfs_flush_out_free);
2366         if (in_drain)
2367                 OSIncrementAtomic(&cfil_stats.cfs_flush_in_free);
2368
2369         zfree(cfil_info_zone, cfil_info);
2370 }
2371
2372 /*
2373  * Entry point from Sockets layer
2374  * The socket is locked.
2375  */
2376 errno_t
2377 cfil_sock_attach(struct socket *so)
2378 {
2379         errno_t error = 0;
2380         uint32_t filter_control_unit;
2381
2382         socket_lock_assert_owned(so);
2383
2384         /* Limit ourselves to TCP that are not MPTCP subflows */
2385         if ((so->so_proto->pr_domain->dom_family != PF_INET &&
2386                 so->so_proto->pr_domain->dom_family != PF_INET6) ||
2387                 so->so_proto->pr_type != SOCK_STREAM ||
2388                 so->so_proto->pr_protocol != IPPROTO_TCP ||
2389                 (so->so_flags & SOF_MP_SUBFLOW) != 0 ||
2390                 (so->so_flags1 & SOF1_CONTENT_FILTER_SKIP) != 0)
2391                 goto done;
2392
2393         filter_control_unit = necp_socket_get_content_filter_control_unit(so);
2394         if (filter_control_unit == 0)
2395                 goto done;
2396
2397         if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
2398                 OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
2399                 goto done;
2400         }
2401         if (cfil_active_count == 0) {
2402                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
2403                 goto done;
2404         }
2405         if (so->so_cfil != NULL) {
2406                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_already);
2407                 CFIL_LOG(LOG_ERR, "already attached");
2408         } else {
2409                 cfil_info_alloc(so, NULL);
2410                 if (so->so_cfil == NULL) {
2411                         error = ENOMEM;
2412                         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
2413                         goto done;
2414                 }
2415         }
2416         if (cfil_info_attach_unit(so, filter_control_unit, so->so_cfil) == 0) {
2417                 CFIL_LOG(LOG_ERR, "cfil_info_attach_unit(%u) failed",
2418                         filter_control_unit);
2419                 OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
2420                 goto done;
2421         }
2422         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u sockID %llx",
2423                 (uint64_t)VM_KERNEL_ADDRPERM(so),
2424                 filter_control_unit, so->so_cfil->cfi_sock_id);
2425
2426         so->so_flags |= SOF_CONTENT_FILTER;
2427         OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
2428
2429         /* Hold a reference on the socket */
2430         so->so_usecount++;
2431
2432         error = cfil_dispatch_attach_event(so, so->so_cfil, filter_control_unit);
2433         /* We can recover from flow control or out of memory errors */
2434         if (error == ENOBUFS || error == ENOMEM)
2435                 error = 0;
2436         else if (error != 0)
2437                 goto done;
2438
2439         CFIL_INFO_VERIFY(so->so_cfil);
2440 done:
2441         return (error);
2442 }
2443
2444 /*
2445  * Entry point from Sockets layer
2446  * The socket is locked.
2447  */
2448 errno_t
2449 cfil_sock_detach(struct socket *so)
2450 {
2451         if (IS_UDP(so)) {
2452                 cfil_db_free(so);
2453                 return (0);
2454         }
2455
2456         if (so->so_cfil) {
2457                 if (so->so_flags & SOF_CONTENT_FILTER) {
2458                         so->so_flags &= ~SOF_CONTENT_FILTER;
2459                         VERIFY(so->so_usecount > 0);
2460                         so->so_usecount--;
2461                 }
2462                 cfil_info_free(so->so_cfil);
2463                 so->so_cfil = NULL;
2464                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
2465         }
2466         return (0);
2467 }
2468
2469 static int
2470 cfil_dispatch_attach_event(struct socket *so, struct cfil_info *cfil_info, uint32_t filter_control_unit)
2471 {
2472         errno_t error = 0;
2473         struct cfil_entry *entry = NULL;
2474         struct cfil_msg_sock_attached msg_attached;
2475         uint32_t kcunit;
2476         struct content_filter *cfc = NULL;
2477
2478         socket_lock_assert_owned(so);
2479
2480         cfil_rw_lock_shared(&cfil_lck_rw);
2481
2482         if (so->so_proto == NULL || so->so_proto->pr_domain == NULL) {
2483                 error = EINVAL;
2484                 goto done;
2485         }
2486         /*
2487          * Find the matching filter unit
2488          */
2489         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
2490                 cfc = content_filters[kcunit - 1];
2491
2492                 if (cfc == NULL)
2493                         continue;
2494                 if (cfc->cf_necp_control_unit != filter_control_unit)
2495                         continue;
2496                 entry = &cfil_info->cfi_entries[kcunit - 1];
2497                 if (entry->cfe_filter == NULL)
2498                         continue;
2499
2500                 VERIFY(cfc == entry->cfe_filter);
2501
2502                 break;
2503         }
2504
2505         if (entry == NULL || entry->cfe_filter == NULL)
2506                 goto done;
2507
2508         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED))
2509                 goto done;
2510
2511         CFIL_LOG(LOG_INFO, "so %llx filter_control_unit %u kcunit %u",
2512                 (uint64_t)VM_KERNEL_ADDRPERM(so), filter_control_unit, kcunit);
2513
2514         /* Would be wasteful to try when flow controlled */
2515         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2516                 error = ENOBUFS;
2517                 goto done;
2518         }
2519
2520         bzero(&msg_attached, sizeof(struct cfil_msg_sock_attached));
2521         msg_attached.cfs_msghdr.cfm_len = sizeof(struct cfil_msg_sock_attached);
2522         msg_attached.cfs_msghdr.cfm_version = CFM_VERSION_CURRENT;
2523         msg_attached.cfs_msghdr.cfm_type = CFM_TYPE_EVENT;
2524         msg_attached.cfs_msghdr.cfm_op = CFM_OP_SOCKET_ATTACHED;
2525         msg_attached.cfs_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2526
2527         msg_attached.cfs_sock_family = so->so_proto->pr_domain->dom_family;
2528         msg_attached.cfs_sock_type = so->so_proto->pr_type;
2529         msg_attached.cfs_sock_protocol = so->so_proto->pr_protocol;
2530         msg_attached.cfs_pid = so->last_pid;
2531         memcpy(msg_attached.cfs_uuid, so->last_uuid, sizeof(uuid_t));
2532         if (so->so_flags & SOF_DELEGATED) {
2533                 msg_attached.cfs_e_pid = so->e_pid;
2534                 memcpy(msg_attached.cfs_e_uuid, so->e_uuid, sizeof(uuid_t));
2535         } else {
2536                 msg_attached.cfs_e_pid = so->last_pid;
2537                 memcpy(msg_attached.cfs_e_uuid, so->last_uuid, sizeof(uuid_t));
2538         }
2539
2540 #if LIFECYCLE_DEBUG
2541         CFIL_LOG(LOG_DEBUG, "CFIL: LIFECYCLE: SENDING ATTACH UP <sockID %llu> ",
2542                          entry->cfe_cfil_info->cfi_sock_id);
2543 #endif
2544
2545         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2546                                 entry->cfe_filter->cf_kcunit,
2547                                 &msg_attached,
2548                                 sizeof(struct cfil_msg_sock_attached),
2549                                 CTL_DATA_EOR);
2550         if (error != 0) {
2551                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d", error);
2552                 goto done;
2553         }
2554         microuptime(&entry->cfe_last_event);
2555         cfil_info->cfi_first_event.tv_sec = entry->cfe_last_event.tv_sec;
2556         cfil_info->cfi_first_event.tv_usec = entry->cfe_last_event.tv_usec;
2557
2558         entry->cfe_flags |= CFEF_SENT_SOCK_ATTACHED;
2559         OSIncrementAtomic(&cfil_stats.cfs_attach_event_ok);
2560 done:
2561
2562         /* We can recover from flow control */
2563         if (error == ENOBUFS) {
2564                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2565                 OSIncrementAtomic(&cfil_stats.cfs_attach_event_flow_control);
2566
2567                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2568                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2569
2570                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2571
2572                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2573         } else {
2574                 if (error != 0)
2575                         OSIncrementAtomic(&cfil_stats.cfs_attach_event_fail);
2576
2577                 cfil_rw_unlock_shared(&cfil_lck_rw);
2578         }
2579         return (error);
2580 }
2581
2582 static int
2583 cfil_dispatch_disconnect_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
2584 {
2585         errno_t error = 0;
2586         struct mbuf *msg = NULL;
2587         struct cfil_entry *entry;
2588         struct cfe_buf *entrybuf;
2589         struct cfil_msg_hdr msg_disconnected;
2590         struct content_filter *cfc;
2591
2592         socket_lock_assert_owned(so);
2593
2594         cfil_rw_lock_shared(&cfil_lck_rw);
2595
2596         entry = &cfil_info->cfi_entries[kcunit - 1];
2597         if (outgoing)
2598                 entrybuf = &entry->cfe_snd;
2599         else
2600                 entrybuf = &entry->cfe_rcv;
2601
2602         cfc = entry->cfe_filter;
2603         if (cfc == NULL)
2604                 goto done;
2605
2606         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2607                          (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2608
2609         /*
2610          * Send the disconnection event once
2611          */
2612         if ((outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT)) ||
2613                 (!outgoing && (entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))) {
2614                 CFIL_LOG(LOG_INFO, "so %llx disconnect already sent",
2615                         (uint64_t)VM_KERNEL_ADDRPERM(so));
2616                 goto done;
2617         }
2618
2619         /*
2620          * We're not disconnected as long as some data is waiting
2621          * to be delivered to the filter
2622          */
2623         if (outgoing && cfil_queue_empty(&entrybuf->cfe_ctl_q) == 0) {
2624                 CFIL_LOG(LOG_INFO, "so %llx control queue not empty",
2625                         (uint64_t)VM_KERNEL_ADDRPERM(so));
2626                 error = EBUSY;
2627                 goto done;
2628         }
2629         /* Would be wasteful to try when flow controlled */
2630         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2631                 error = ENOBUFS;
2632                 goto done;
2633         }
2634
2635 #if LIFECYCLE_DEBUG
2636         cfil_info_log(LOG_ERR, cfil_info, outgoing ?
2637                                  "CFIL: LIFECYCLE: OUT - SENDING DISCONNECT UP":
2638                                  "CFIL: LIFECYCLE: IN - SENDING DISCONNECT UP");
2639 #endif
2640
2641         bzero(&msg_disconnected, sizeof(struct cfil_msg_hdr));
2642         msg_disconnected.cfm_len = sizeof(struct cfil_msg_hdr);
2643         msg_disconnected.cfm_version = CFM_VERSION_CURRENT;
2644         msg_disconnected.cfm_type = CFM_TYPE_EVENT;
2645         msg_disconnected.cfm_op = outgoing ? CFM_OP_DISCONNECT_OUT :
2646                 CFM_OP_DISCONNECT_IN;
2647         msg_disconnected.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2648         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2649                                 entry->cfe_filter->cf_kcunit,
2650                                 &msg_disconnected,
2651                                 sizeof(struct cfil_msg_hdr),
2652                                 CTL_DATA_EOR);
2653         if (error != 0) {
2654                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2655                 mbuf_freem(msg);
2656                 goto done;
2657         }
2658         microuptime(&entry->cfe_last_event);
2659         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, msg_disconnected.cfm_op);
2660
2661         /* Remember we have sent the disconnection message */
2662         if (outgoing) {
2663                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_OUT;
2664                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_out_event_ok);
2665         } else {
2666                 entry->cfe_flags |= CFEF_SENT_DISCONNECT_IN;
2667                 OSIncrementAtomic(&cfil_stats.cfs_disconnect_in_event_ok);
2668         }
2669 done:
2670         if (error == ENOBUFS) {
2671                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2672                 OSIncrementAtomic(
2673                         &cfil_stats.cfs_disconnect_event_flow_control);
2674
2675                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2676                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2677
2678                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2679
2680                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2681         } else {
2682                 if (error != 0)
2683                         OSIncrementAtomic(
2684                                 &cfil_stats.cfs_disconnect_event_fail);
2685
2686                 cfil_rw_unlock_shared(&cfil_lck_rw);
2687         }
2688         return (error);
2689 }
2690
2691 int
2692 cfil_dispatch_closed_event(struct socket *so, struct cfil_info *cfil_info, int kcunit)
2693 {
2694         struct cfil_entry *entry;
2695         struct cfil_msg_sock_closed msg_closed;
2696         errno_t error = 0;
2697         struct content_filter *cfc;
2698
2699         socket_lock_assert_owned(so);
2700
2701         cfil_rw_lock_shared(&cfil_lck_rw);
2702
2703         entry = &cfil_info->cfi_entries[kcunit - 1];
2704         cfc = entry->cfe_filter;
2705         if (cfc == NULL)
2706                 goto done;
2707
2708         CFIL_LOG(LOG_INFO, "so %llx kcunit %d",
2709                          (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
2710
2711         /* Would be wasteful to try when flow controlled */
2712         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2713                 error = ENOBUFS;
2714                 goto done;
2715         }
2716         /*
2717          * Send a single closed message per filter
2718          */
2719         if ((entry->cfe_flags & CFEF_SENT_SOCK_CLOSED) != 0)
2720                 goto done;
2721         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
2722                 goto done;
2723
2724         microuptime(&entry->cfe_last_event);
2725         CFI_ADD_TIME_LOG(cfil_info, &entry->cfe_last_event, &cfil_info->cfi_first_event, CFM_OP_SOCKET_CLOSED);
2726
2727         bzero(&msg_closed, sizeof(struct cfil_msg_sock_closed));
2728         msg_closed.cfc_msghdr.cfm_len = sizeof(struct cfil_msg_sock_closed);
2729         msg_closed.cfc_msghdr.cfm_version = CFM_VERSION_CURRENT;
2730         msg_closed.cfc_msghdr.cfm_type = CFM_TYPE_EVENT;
2731         msg_closed.cfc_msghdr.cfm_op = CFM_OP_SOCKET_CLOSED;
2732         msg_closed.cfc_msghdr.cfm_sock_id = entry->cfe_cfil_info->cfi_sock_id;
2733         msg_closed.cfc_first_event.tv_sec = cfil_info->cfi_first_event.tv_sec;
2734         msg_closed.cfc_first_event.tv_usec = cfil_info->cfi_first_event.tv_usec;
2735         memcpy(msg_closed.cfc_op_time, cfil_info->cfi_op_time, sizeof(uint32_t)*CFI_MAX_TIME_LOG_ENTRY);
2736         memcpy(msg_closed.cfc_op_list, cfil_info->cfi_op_list, sizeof(unsigned char)*CFI_MAX_TIME_LOG_ENTRY);
2737         msg_closed.cfc_op_list_ctr = cfil_info->cfi_op_list_ctr;
2738
2739 #if LIFECYCLE_DEBUG
2740         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: SENDING CLOSED UP: <sock id %llu> op ctr %d, start time %llu.%llu", msg_closed.cfc_msghdr.cfm_sock_id, cfil_info->cfi_op_list_ctr, cfil_info->cfi_first_event.tv_sec, cfil_info->cfi_first_event.tv_usec);
2741 #endif
2742         /* for debugging
2743         if (msg_closed.cfc_op_list_ctr > CFI_MAX_TIME_LOG_ENTRY) {
2744                 msg_closed.cfc_op_list_ctr  = CFI_MAX_TIME_LOG_ENTRY;       // just in case
2745         }
2746         for (unsigned int i = 0; i < msg_closed.cfc_op_list_ctr ; i++) {
2747                 CFIL_LOG(LOG_ERR, "MD: socket %llu event %2u, time + %u msec", msg_closed.cfc_msghdr.cfm_sock_id, (unsigned short)msg_closed.cfc_op_list[i], msg_closed.cfc_op_time[i]);
2748         }
2749         */
2750
2751         error = ctl_enqueuedata(entry->cfe_filter->cf_kcref,
2752                                 entry->cfe_filter->cf_kcunit,
2753                                 &msg_closed,
2754                                 sizeof(struct cfil_msg_sock_closed),
2755                                 CTL_DATA_EOR);
2756         if (error != 0) {
2757                 CFIL_LOG(LOG_ERR, "ctl_enqueuedata() failed: %d",
2758                         error);
2759                 goto done;
2760         }
2761
2762         entry->cfe_flags |= CFEF_SENT_SOCK_CLOSED;
2763         OSIncrementAtomic(&cfil_stats.cfs_closed_event_ok);
2764 done:
2765         /* We can recover from flow control */
2766         if (error == ENOBUFS) {
2767                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2768                 OSIncrementAtomic(&cfil_stats.cfs_closed_event_flow_control);
2769
2770                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2771                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2772
2773                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2774
2775                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2776         } else {
2777                 if (error != 0)
2778                         OSIncrementAtomic(&cfil_stats.cfs_closed_event_fail);
2779
2780                 cfil_rw_unlock_shared(&cfil_lck_rw);
2781         }
2782
2783         return (error);
2784 }
2785
2786 static void
2787 fill_ip6_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2788         struct in6_addr *ip6, u_int16_t port)
2789 {
2790         struct sockaddr_in6 *sin6 = &sin46->sin6;
2791
2792         sin6->sin6_family = AF_INET6;
2793         sin6->sin6_len = sizeof(*sin6);
2794         sin6->sin6_port = port;
2795         sin6->sin6_addr = *ip6;
2796         if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
2797                 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]);
2798                 sin6->sin6_addr.s6_addr16[1] = 0;
2799         }
2800 }
2801
2802 static void
2803 fill_ip_sockaddr_4_6(union sockaddr_in_4_6 *sin46,
2804         struct in_addr ip, u_int16_t port)
2805 {
2806         struct sockaddr_in *sin = &sin46->sin;
2807
2808         sin->sin_family = AF_INET;
2809         sin->sin_len = sizeof(*sin);
2810         sin->sin_port = port;
2811         sin->sin_addr.s_addr = ip.s_addr;
2812 }
2813
2814 static void
2815 cfil_get_flow_address_v6(struct cfil_hash_entry *entry, struct inpcb *inp,
2816                                                  struct in6_addr **laddr, struct in6_addr **faddr,
2817                                                  u_int16_t *lport, u_int16_t *fport)
2818 {
2819         if (entry != NULL) {
2820                 *laddr = &entry->cfentry_laddr.addr6;
2821                 *faddr = &entry->cfentry_faddr.addr6;
2822                 *lport = entry->cfentry_lport;
2823                 *fport = entry->cfentry_fport;
2824         } else {
2825                 *laddr = &inp->in6p_laddr;
2826                 *faddr = &inp->in6p_faddr;
2827                 *lport = inp->inp_lport;
2828                 *fport = inp->inp_fport;
2829         }
2830 }
2831
2832 static void
2833 cfil_get_flow_address(struct cfil_hash_entry *entry, struct inpcb *inp,
2834                                           struct in_addr *laddr, struct in_addr *faddr,
2835                                           u_int16_t *lport, u_int16_t *fport)
2836 {
2837         if (entry != NULL) {
2838                 *laddr = entry->cfentry_laddr.addr46.ia46_addr4;
2839                 *faddr = entry->cfentry_faddr.addr46.ia46_addr4;
2840                 *lport = entry->cfentry_lport;
2841                 *fport = entry->cfentry_fport;
2842         } else {
2843                 *laddr = inp->inp_laddr;
2844                 *faddr = inp->inp_faddr;
2845                 *lport = inp->inp_lport;
2846                 *fport = inp->inp_fport;
2847         }
2848 }
2849
2850 static int
2851 cfil_dispatch_data_event(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
2852                                                  struct mbuf *data, unsigned int copyoffset, unsigned int copylen)
2853 {
2854         errno_t error = 0;
2855         struct mbuf *copy = NULL;
2856         struct mbuf *msg = NULL;
2857         unsigned int one = 1;
2858         struct cfil_msg_data_event *data_req;
2859         size_t hdrsize;
2860         struct inpcb *inp = (struct inpcb *)so->so_pcb;
2861         struct cfil_entry *entry;
2862         struct cfe_buf *entrybuf;
2863         struct content_filter *cfc;
2864         struct timeval tv;
2865
2866         cfil_rw_lock_shared(&cfil_lck_rw);
2867
2868         entry = &cfil_info->cfi_entries[kcunit - 1];
2869         if (outgoing)
2870                 entrybuf = &entry->cfe_snd;
2871         else
2872                 entrybuf = &entry->cfe_rcv;
2873
2874         cfc = entry->cfe_filter;
2875         if (cfc == NULL)
2876                 goto done;
2877
2878         data = cfil_data_start(data);
2879         if (data == NULL || (data->m_flags & M_PKTHDR) == 0) {
2880                 CFIL_LOG(LOG_ERR, "NOT PKTHDR");
2881                 goto done;
2882         }
2883
2884         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
2885                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
2886
2887         socket_lock_assert_owned(so);
2888
2889         /* Would be wasteful to try */
2890         if (cfc->cf_flags & CFF_FLOW_CONTROLLED) {
2891                 error = ENOBUFS;
2892                 goto done;
2893         }
2894
2895         /* Make a copy of the data to pass to kernel control socket */
2896         copy = m_copym_mode(data, copyoffset, copylen, M_DONTWAIT,
2897                 M_COPYM_NOOP_HDR);
2898         if (copy == NULL) {
2899                 CFIL_LOG(LOG_ERR, "m_copym_mode() failed");
2900                 error = ENOMEM;
2901                 goto done;
2902         }
2903
2904         /* We need an mbuf packet for the message header */
2905         hdrsize = sizeof(struct cfil_msg_data_event);
2906         error = mbuf_allocpacket(MBUF_DONTWAIT, hdrsize, &one, &msg);
2907         if (error != 0) {
2908                 CFIL_LOG(LOG_ERR, "mbuf_allocpacket() failed");
2909                 m_freem(copy);
2910                 /*
2911                  * ENOBUFS is to indicate flow control
2912                  */
2913                 error = ENOMEM;
2914                 goto done;
2915         }
2916         mbuf_setlen(msg, hdrsize);
2917         mbuf_pkthdr_setlen(msg, hdrsize + copylen);
2918         msg->m_next = copy;
2919         data_req = (struct cfil_msg_data_event *)mbuf_data(msg);
2920         bzero(data_req, hdrsize);
2921         data_req->cfd_msghdr.cfm_len = hdrsize + copylen;
2922         data_req->cfd_msghdr.cfm_version = 1;
2923         data_req->cfd_msghdr.cfm_type = CFM_TYPE_EVENT;
2924         data_req->cfd_msghdr.cfm_op =
2925                 outgoing ? CFM_OP_DATA_OUT : CFM_OP_DATA_IN;
2926         data_req->cfd_msghdr.cfm_sock_id =
2927                 entry->cfe_cfil_info->cfi_sock_id;
2928         data_req->cfd_start_offset = entrybuf->cfe_peeked;
2929         data_req->cfd_end_offset = entrybuf->cfe_peeked + copylen;
2930
2931         /*
2932          * TBD:
2933          * For non connected sockets need to copy addresses from passed
2934          * parameters
2935          */
2936         if (inp->inp_vflag & INP_IPV6) {
2937                 struct in6_addr *laddr = NULL, *faddr = NULL;
2938                 u_int16_t lport = 0, fport = 0;
2939
2940                 cfil_get_flow_address_v6(cfil_info->cfi_hash_entry, inp,
2941                                                                  &laddr, &faddr, &lport, &fport);
2942                 if (outgoing) {
2943                         fill_ip6_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2944                         fill_ip6_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2945                 } else {
2946                         fill_ip6_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2947                         fill_ip6_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2948                 }
2949         } else if (inp->inp_vflag & INP_IPV4) {
2950                 struct in_addr laddr = {0}, faddr = {0};
2951                 u_int16_t lport = 0, fport = 0;
2952
2953                 cfil_get_flow_address(cfil_info->cfi_hash_entry, inp,
2954                                                           &laddr, &faddr, &lport, &fport);
2955
2956                 if (outgoing) {
2957                         fill_ip_sockaddr_4_6(&data_req->cfc_src, laddr, lport);
2958                         fill_ip_sockaddr_4_6(&data_req->cfc_dst, faddr, fport);
2959                 } else {
2960                         fill_ip_sockaddr_4_6(&data_req->cfc_src, faddr, fport);
2961                         fill_ip_sockaddr_4_6(&data_req->cfc_dst, laddr, lport);
2962                 }
2963         }
2964
2965         microuptime(&tv);
2966         CFI_ADD_TIME_LOG(cfil_info, &tv, &cfil_info->cfi_first_event, data_req->cfd_msghdr.cfm_op);
2967
2968         /* Pass the message to the content filter */
2969         error = ctl_enqueuembuf(entry->cfe_filter->cf_kcref,
2970                                 entry->cfe_filter->cf_kcunit,
2971                                 msg, CTL_DATA_EOR);
2972         if (error != 0) {
2973                 CFIL_LOG(LOG_ERR, "ctl_enqueuembuf() failed: %d", error);
2974                 mbuf_freem(msg);
2975                 goto done;
2976         }
2977         entry->cfe_flags &= ~CFEF_FLOW_CONTROLLED;
2978         OSIncrementAtomic(&cfil_stats.cfs_data_event_ok);
2979
2980 #if VERDICT_DEBUG
2981         CFIL_LOG(LOG_ERR, "CFIL: VERDICT ACTION: so %llx sockID %llu outgoing %d: mbuf %llx copyoffset %u copylen %u",
2982                          (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing, (uint64_t)VM_KERNEL_ADDRPERM(data), copyoffset, copylen);
2983 #endif
2984
2985 done:
2986         if (error == ENOBUFS) {
2987                 entry->cfe_flags |= CFEF_FLOW_CONTROLLED;
2988                 OSIncrementAtomic(
2989                         &cfil_stats.cfs_data_event_flow_control);
2990
2991                 if (!cfil_rw_lock_shared_to_exclusive(&cfil_lck_rw))
2992                         cfil_rw_lock_exclusive(&cfil_lck_rw);
2993
2994                 cfc->cf_flags |= CFF_FLOW_CONTROLLED;
2995
2996                 cfil_rw_unlock_exclusive(&cfil_lck_rw);
2997         } else {
2998                 if (error != 0)
2999                         OSIncrementAtomic(&cfil_stats.cfs_data_event_fail);
3000
3001                 cfil_rw_unlock_shared(&cfil_lck_rw);
3002         }
3003         return (error);
3004 }
3005
3006 /*
3007  * Process the queue of data waiting to be delivered to content filter
3008  */
3009 static int
3010 cfil_data_service_ctl_q(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3011 {
3012         errno_t error = 0;
3013         struct mbuf *data, *tmp = NULL;
3014         unsigned int datalen = 0, copylen = 0, copyoffset = 0;
3015         struct cfil_entry *entry;
3016         struct cfe_buf *entrybuf;
3017         uint64_t currentoffset = 0;
3018
3019         if (cfil_info == NULL)
3020                 return (0);
3021
3022         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3023                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3024
3025         socket_lock_assert_owned(so);
3026
3027         entry = &cfil_info->cfi_entries[kcunit - 1];
3028         if (outgoing)
3029                 entrybuf = &entry->cfe_snd;
3030         else
3031                 entrybuf = &entry->cfe_rcv;
3032
3033         /* Send attached message if not yet done */
3034         if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0) {
3035                 error = cfil_dispatch_attach_event(so, cfil_info, kcunit);
3036                 if (error != 0) {
3037                         /* We can recover from flow control */
3038                         if (error == ENOBUFS || error == ENOMEM)
3039                                 error = 0;
3040                         goto done;
3041                 }
3042         } else if ((entry->cfe_flags & CFEF_DATA_START) == 0) {
3043                 OSIncrementAtomic(&cfil_stats.cfs_ctl_q_not_started);
3044                 goto done;
3045         }
3046
3047 #if DATA_DEBUG
3048         CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE CTL-Q: pass_offset %llu peeked %llu peek_offset %llu",
3049                 entrybuf->cfe_pass_offset,
3050                 entrybuf->cfe_peeked,
3051                 entrybuf->cfe_peek_offset);
3052 #endif
3053
3054         /* Move all data that can pass */
3055         while ((data = cfil_queue_first(&entrybuf->cfe_ctl_q)) != NULL &&
3056                 entrybuf->cfe_ctl_q.q_start < entrybuf->cfe_pass_offset) {
3057                 datalen = cfil_data_length(data, NULL, NULL);
3058                 tmp = data;
3059
3060                 if (entrybuf->cfe_ctl_q.q_start + datalen <=
3061                         entrybuf->cfe_pass_offset) {
3062                         /*
3063                          * The first mbuf can fully pass
3064                          */
3065                         copylen = datalen;
3066                 } else {
3067                         /*
3068                          * The first mbuf can partially pass
3069                          */
3070                         copylen = entrybuf->cfe_pass_offset -
3071                                 entrybuf->cfe_ctl_q.q_start;
3072                 }
3073                 VERIFY(copylen <= datalen);
3074
3075 #if DATA_DEBUG
3076                 CFIL_LOG(LOG_DEBUG,
3077                                  "CFIL: SERVICE CTL-Q PASSING: %llx first %llu peeked %llu pass %llu peek %llu"
3078                                  "datalen %u copylen %u",
3079                                  (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3080                                  entrybuf->cfe_ctl_q.q_start,
3081                                  entrybuf->cfe_peeked,
3082                                  entrybuf->cfe_pass_offset,
3083                                  entrybuf->cfe_peek_offset,
3084                                  datalen, copylen);
3085 #endif
3086
3087                 /*
3088                  * Data that passes has been peeked at explicitly or
3089                  * implicitly
3090                  */
3091                 if (entrybuf->cfe_ctl_q.q_start + copylen >
3092                         entrybuf->cfe_peeked)
3093                         entrybuf->cfe_peeked =
3094                                 entrybuf->cfe_ctl_q.q_start + copylen;
3095                 /*
3096                  * Stop on partial pass
3097                  */
3098                 if (copylen < datalen)
3099                         break;
3100
3101                 /* All good, move full data from ctl queue to pending queue */
3102                 cfil_queue_remove(&entrybuf->cfe_ctl_q, data, datalen);
3103
3104                 cfil_queue_enqueue(&entrybuf->cfe_pending_q, data, datalen);
3105                 if (outgoing)
3106                         OSAddAtomic64(datalen,
3107                                 &cfil_stats.cfs_pending_q_out_enqueued);
3108                 else
3109                         OSAddAtomic64(datalen,
3110                                 &cfil_stats.cfs_pending_q_in_enqueued);
3111         }
3112         CFIL_INFO_VERIFY(cfil_info);
3113         if (tmp != NULL)
3114                 CFIL_LOG(LOG_DEBUG,
3115                         "%llx first %llu peeked %llu pass %llu peek %llu"
3116                         "datalen %u copylen %u",
3117                         (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3118                         entrybuf->cfe_ctl_q.q_start,
3119                         entrybuf->cfe_peeked,
3120                         entrybuf->cfe_pass_offset,
3121                         entrybuf->cfe_peek_offset,
3122                         datalen, copylen);
3123         tmp = NULL;
3124
3125         /* Now deal with remaining data the filter wants to peek at */
3126         for (data = cfil_queue_first(&entrybuf->cfe_ctl_q),
3127                 currentoffset = entrybuf->cfe_ctl_q.q_start;
3128                 data != NULL && currentoffset < entrybuf->cfe_peek_offset;
3129                 data = cfil_queue_next(&entrybuf->cfe_ctl_q, data),
3130                 currentoffset += datalen) {
3131                 datalen = cfil_data_length(data, NULL, NULL);
3132                 tmp = data;
3133
3134                 /* We've already peeked at this mbuf */
3135                 if (currentoffset + datalen <= entrybuf->cfe_peeked)
3136                         continue;
3137                 /*
3138                  * The data in the first mbuf may have been
3139                  * partially peeked at
3140                  */
3141                 copyoffset = entrybuf->cfe_peeked - currentoffset;
3142                 VERIFY(copyoffset < datalen);
3143                 copylen = datalen - copyoffset;
3144                 VERIFY(copylen <= datalen);
3145                 /*
3146                  * Do not copy more than needed
3147                  */
3148                 if (currentoffset + copyoffset + copylen >
3149                         entrybuf->cfe_peek_offset) {
3150                         copylen = entrybuf->cfe_peek_offset -
3151                                 (currentoffset + copyoffset);
3152                 }
3153
3154 #if DATA_DEBUG
3155                 CFIL_LOG(LOG_DEBUG,
3156                                  "CFIL: SERVICE CTL-Q PEEKING: %llx current %llu peeked %llu pass %llu peek %llu "
3157                                  "datalen %u copylen %u copyoffset %u",
3158                                  (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3159                                  currentoffset,
3160                                  entrybuf->cfe_peeked,
3161                                  entrybuf->cfe_pass_offset,
3162                                  entrybuf->cfe_peek_offset,
3163                                  datalen, copylen, copyoffset);
3164 #endif
3165
3166                 /*
3167                  * Stop if there is nothing more to peek at
3168                  */
3169                 if (copylen == 0)
3170                         break;
3171                 /*
3172                  * Let the filter get a peek at this span of data
3173                  */
3174                 error = cfil_dispatch_data_event(so, cfil_info, kcunit,
3175                         outgoing, data, copyoffset, copylen);
3176                 if (error != 0) {
3177                         /* On error, leave data in ctl_q */
3178                         break;
3179                 }
3180                 entrybuf->cfe_peeked += copylen;
3181                 if (outgoing)
3182                         OSAddAtomic64(copylen,
3183                                 &cfil_stats.cfs_ctl_q_out_peeked);
3184                 else
3185                         OSAddAtomic64(copylen,
3186                                 &cfil_stats.cfs_ctl_q_in_peeked);
3187
3188                 /* Stop when data could not be fully peeked at */
3189                 if (copylen + copyoffset < datalen)
3190                         break;
3191         }
3192         CFIL_INFO_VERIFY(cfil_info);
3193         if (tmp != NULL)
3194                 CFIL_LOG(LOG_DEBUG,
3195                         "%llx first %llu peeked %llu pass %llu peek %llu"
3196                         "datalen %u copylen %u copyoffset %u",
3197                         (uint64_t)VM_KERNEL_ADDRPERM(tmp),
3198                         currentoffset,
3199                         entrybuf->cfe_peeked,
3200                         entrybuf->cfe_pass_offset,
3201                         entrybuf->cfe_peek_offset,
3202                         datalen, copylen, copyoffset);
3203
3204         /*
3205          * Process data that has passed the filter
3206          */
3207         error = cfil_service_pending_queue(so, cfil_info, kcunit, outgoing);
3208         if (error != 0) {
3209                 CFIL_LOG(LOG_ERR, "cfil_service_pending_queue() error %d",
3210                         error);
3211                 goto done;
3212         }
3213
3214         /*
3215          * Dispatch disconnect events that could not be sent
3216          */
3217         if (cfil_info == NULL)
3218                 goto done;
3219         else if (outgoing) {
3220                 if ((cfil_info->cfi_flags & CFIF_SHUT_WR) &&
3221                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_OUT))
3222                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
3223         } else {
3224                 if ((cfil_info->cfi_flags & CFIF_SHUT_RD) &&
3225                     !(entry->cfe_flags & CFEF_SENT_DISCONNECT_IN))
3226                         cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
3227         }
3228
3229 done:
3230         CFIL_LOG(LOG_DEBUG,
3231                 "first %llu peeked %llu pass %llu peek %llu",
3232                 entrybuf->cfe_ctl_q.q_start,
3233                 entrybuf->cfe_peeked,
3234                 entrybuf->cfe_pass_offset,
3235                 entrybuf->cfe_peek_offset);
3236
3237         CFIL_INFO_VERIFY(cfil_info);
3238         return (error);
3239 }
3240
3241 /*
3242  * cfil_data_filter()
3243  *
3244  * Process data for a content filter installed on a socket
3245  */
3246 int
3247 cfil_data_filter(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3248         struct mbuf *data, uint64_t datalen)
3249 {
3250         errno_t error = 0;
3251         struct cfil_entry *entry;
3252         struct cfe_buf *entrybuf;
3253
3254         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3255                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3256
3257         socket_lock_assert_owned(so);
3258
3259         entry = &cfil_info->cfi_entries[kcunit - 1];
3260         if (outgoing)
3261                 entrybuf = &entry->cfe_snd;
3262         else
3263                 entrybuf = &entry->cfe_rcv;
3264
3265         /* Are we attached to the filter? */
3266         if (entry->cfe_filter == NULL) {
3267                 error = 0;
3268                 goto done;
3269         }
3270
3271         /* Dispatch to filters */
3272         cfil_queue_enqueue(&entrybuf->cfe_ctl_q, data, datalen);
3273         if (outgoing)
3274                 OSAddAtomic64(datalen,
3275                         &cfil_stats.cfs_ctl_q_out_enqueued);
3276         else
3277                 OSAddAtomic64(datalen,
3278                         &cfil_stats.cfs_ctl_q_in_enqueued);
3279
3280         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3281         if (error != 0) {
3282                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3283                         error);
3284         }
3285         /*
3286          * We have to return EJUSTRETURN in all cases to avoid double free
3287          * by socket layer
3288          */
3289         error = EJUSTRETURN;
3290 done:
3291         CFIL_INFO_VERIFY(cfil_info);
3292
3293         CFIL_LOG(LOG_INFO, "return %d", error);
3294         return (error);
3295 }
3296
3297 /*
3298  * cfil_service_inject_queue() re-inject data that passed the
3299  * content filters
3300  */
3301 static int
3302 cfil_service_inject_queue(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3303 {
3304         mbuf_t data;
3305         unsigned int datalen;
3306         int mbcnt = 0;
3307         int mbnum = 0;
3308         errno_t error = 0;
3309         struct cfi_buf *cfi_buf;
3310         struct cfil_queue *inject_q;
3311         int need_rwakeup = 0;
3312         int count = 0;
3313
3314         if (cfil_info == NULL)
3315                 return (0);
3316
3317         socket_lock_assert_owned(so);
3318
3319         if (outgoing) {
3320                 cfi_buf = &cfil_info->cfi_snd;
3321                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_OUT;
3322         } else {
3323                 cfi_buf = &cfil_info->cfi_rcv;
3324                 cfil_info->cfi_flags &= ~CFIF_RETRY_INJECT_IN;
3325         }
3326         inject_q = &cfi_buf->cfi_inject_q;
3327
3328         if (cfil_queue_empty(inject_q))
3329                 return (0);
3330
3331 #if DATA_DEBUG | VERDICT_DEBUG
3332         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> outgoing %d queue len %llu",
3333                          (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, cfil_queue_len(inject_q));
3334 #endif
3335
3336         while ((data = cfil_queue_first(inject_q)) != NULL) {
3337                 datalen = cfil_data_length(data, &mbcnt, &mbnum);
3338
3339 #if DATA_DEBUG
3340                 CFIL_LOG(LOG_DEBUG, "CFIL: SERVICE INJECT-Q: <%s>: <so %llx> data %llx datalen %u (mbcnt %u)",
3341                                  remote_addr_ptr ? "UNCONNECTED" : "CONNECTED",
3342                                  (uint64_t)VM_KERNEL_ADDRPERM(so), (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, mbcnt);
3343 #endif
3344
3345                 /* Remove data from queue and adjust stats */
3346                 cfil_queue_remove(inject_q, data, datalen);
3347                 cfi_buf->cfi_pending_first += datalen;
3348                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3349                 cfi_buf->cfi_pending_mbnum -= mbnum;
3350                 cfil_info_buf_verify(cfi_buf);
3351
3352                 if (outgoing) {
3353                         error = sosend_reinject(so, NULL, data, NULL, 0);
3354                         if (error != 0) {
3355 #if DATA_DEBUG
3356                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: Error: sosend_reinject() failed");
3357                                 CFIL_LOG(LOG_ERR, "### sosend() failed %d", error);
3358 #endif
3359                                 break;
3360                         }
3361                         // At least one injection succeeded, need to wake up pending threads.
3362                         need_rwakeup = 1;
3363                 } else {
3364                         data->m_flags |= M_SKIPCFIL;
3365
3366                         /*
3367                          * NOTE: We currently only support TCP and UDP.
3368                          * For RAWIP, MPTCP and message TCP we'll
3369                          * need to call the appropriate sbappendxxx()
3370                          * of fix sock_inject_data_in()
3371                          */
3372                         if (IS_UDP(so) == TRUE) {
3373                                 if (sbappendchain(&so->so_rcv, data, 0))
3374                                         need_rwakeup = 1;
3375                         } else {
3376                                 if (sbappendstream(&so->so_rcv, data))
3377                                         need_rwakeup = 1;
3378                         }
3379                 }
3380
3381                 if (outgoing)
3382                         OSAddAtomic64(datalen,
3383                                 &cfil_stats.cfs_inject_q_out_passed);
3384                 else
3385                         OSAddAtomic64(datalen,
3386                                 &cfil_stats.cfs_inject_q_in_passed);
3387
3388                 count++;
3389         }
3390
3391 #if DATA_DEBUG | VERDICT_DEBUG
3392         CFIL_LOG(LOG_ERR, "CFIL: SERVICE INJECT-Q: <so %llx> injected %d",
3393                          (uint64_t)VM_KERNEL_ADDRPERM(so), count);
3394 #endif
3395
3396         /* A single wakeup is for several packets is more efficient */
3397         if (need_rwakeup) {
3398                 if (outgoing == TRUE)
3399                         sowwakeup(so);
3400                 else
3401                         sorwakeup(so);
3402         }
3403
3404         if (error != 0 && cfil_info) {
3405                 if (error == ENOBUFS)
3406                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nobufs);
3407                 if (error == ENOMEM)
3408                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_nomem);
3409
3410                 if (outgoing) {
3411                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_OUT;
3412                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_fail);
3413                 } else {
3414                         cfil_info->cfi_flags |= CFIF_RETRY_INJECT_IN;
3415                         OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_fail);
3416                 }
3417         }
3418
3419         /*
3420          * Notify
3421          */
3422         if (cfil_info && (cfil_info->cfi_flags & CFIF_SHUT_WR)) {
3423                 cfil_sock_notify_shutdown(so, SHUT_WR);
3424                 if (cfil_sock_data_pending(&so->so_snd) == 0)
3425                         soshutdownlock_final(so, SHUT_WR);
3426         }
3427         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3428                 if (cfil_filters_attached(so) == 0) {
3429                         CFIL_LOG(LOG_INFO, "so %llx waking",
3430                                 (uint64_t)VM_KERNEL_ADDRPERM(so));
3431                         wakeup((caddr_t)cfil_info);
3432                 }
3433         }
3434
3435         CFIL_INFO_VERIFY(cfil_info);
3436
3437         return (error);
3438 }
3439
3440 static int
3441 cfil_service_pending_queue(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing)
3442 {
3443         uint64_t passlen, curlen;
3444         mbuf_t data;
3445         unsigned int datalen;
3446         errno_t error = 0;
3447         struct cfil_entry *entry;
3448         struct cfe_buf *entrybuf;
3449         struct cfil_queue *pending_q;
3450
3451         CFIL_LOG(LOG_INFO, "so %llx kcunit %u outgoing %d",
3452                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit, outgoing);
3453
3454         socket_lock_assert_owned(so);
3455
3456         entry = &cfil_info->cfi_entries[kcunit - 1];
3457         if (outgoing)
3458                 entrybuf = &entry->cfe_snd;
3459         else
3460                 entrybuf = &entry->cfe_rcv;
3461
3462         pending_q = &entrybuf->cfe_pending_q;
3463
3464         passlen = entrybuf->cfe_pass_offset - pending_q->q_start;
3465
3466         /*
3467          * Locate the chunks of data that we can pass to the next filter
3468          * A data chunk must be on mbuf boundaries
3469          */
3470         curlen = 0;
3471         while ((data = cfil_queue_first(pending_q)) != NULL) {
3472                 datalen = cfil_data_length(data, NULL, NULL);
3473
3474 #if DATA_DEBUG
3475                 CFIL_LOG(LOG_DEBUG,
3476                                  "CFIL: SERVICE PENDING-Q: data %llx datalen %u passlen %llu curlen %llu",
3477                         (uint64_t)VM_KERNEL_ADDRPERM(data), datalen,
3478                         passlen, curlen);
3479 #endif
3480
3481                 if (curlen + datalen > passlen)
3482                         break;
3483
3484                 cfil_queue_remove(pending_q, data, datalen);
3485
3486                 curlen += datalen;
3487
3488                 for (kcunit += 1;
3489                         kcunit <= MAX_CONTENT_FILTER;
3490                         kcunit++) {
3491                         error = cfil_data_filter(so, cfil_info, kcunit, outgoing,
3492                                 data, datalen);
3493                         /* 0 means passed so we can continue */
3494                         if (error != 0)
3495                                 break;
3496                 }
3497                 /* When data has passed all filters, re-inject */
3498                 if (error == 0) {
3499                         if (outgoing) {
3500                                 cfil_queue_enqueue(
3501                                         &cfil_info->cfi_snd.cfi_inject_q,
3502                                         data, datalen);
3503                                 OSAddAtomic64(datalen,
3504                                         &cfil_stats.cfs_inject_q_out_enqueued);
3505                         } else {
3506                                 cfil_queue_enqueue(
3507                                         &cfil_info->cfi_rcv.cfi_inject_q,
3508                                         data, datalen);
3509                                 OSAddAtomic64(datalen,
3510                                         &cfil_stats.cfs_inject_q_in_enqueued);
3511                         }
3512                 }
3513         }
3514
3515         CFIL_INFO_VERIFY(cfil_info);
3516
3517         return (error);
3518 }
3519
3520 int
3521 cfil_update_data_offsets(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3522         uint64_t pass_offset, uint64_t peek_offset)
3523 {
3524         errno_t error = 0;
3525         struct cfil_entry *entry = NULL;
3526         struct cfe_buf *entrybuf;
3527         int updated = 0;
3528
3529         CFIL_LOG(LOG_INFO, "pass %llu peek %llu", pass_offset, peek_offset);
3530
3531         socket_lock_assert_owned(so);
3532
3533         if (cfil_info == NULL) {
3534                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3535                         (uint64_t)VM_KERNEL_ADDRPERM(so));
3536                 error = 0;
3537                 goto done;
3538         } else if (cfil_info->cfi_flags & CFIF_DROP) {
3539                 CFIL_LOG(LOG_ERR, "so %llx drop set",
3540                         (uint64_t)VM_KERNEL_ADDRPERM(so));
3541                 error = EPIPE;
3542                 goto done;
3543         }
3544
3545         entry = &cfil_info->cfi_entries[kcunit - 1];
3546         if (outgoing)
3547                 entrybuf = &entry->cfe_snd;
3548         else
3549                 entrybuf = &entry->cfe_rcv;
3550
3551         /* Record updated offsets for this content filter */
3552         if (pass_offset > entrybuf->cfe_pass_offset) {
3553                 entrybuf->cfe_pass_offset = pass_offset;
3554
3555                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3556                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3557                 updated = 1;
3558         } else {
3559                 CFIL_LOG(LOG_INFO, "pass_offset %llu <= cfe_pass_offset %llu",
3560                         pass_offset, entrybuf->cfe_pass_offset);
3561         }
3562         /* Filter does not want or need to see data that's allowed to pass */
3563         if (peek_offset > entrybuf->cfe_pass_offset &&
3564                 peek_offset > entrybuf->cfe_peek_offset) {
3565                 entrybuf->cfe_peek_offset = peek_offset;
3566                 updated = 1;
3567         }
3568         /* Nothing to do */
3569         if (updated == 0)
3570                 goto done;
3571
3572         /* Move data held in control queue to pending queue if needed */
3573         error = cfil_data_service_ctl_q(so, cfil_info, kcunit, outgoing);
3574         if (error != 0) {
3575                 CFIL_LOG(LOG_ERR, "cfil_data_service_ctl_q() error %d",
3576                         error);
3577                 goto done;
3578         }
3579         error = EJUSTRETURN;
3580
3581 done:
3582         /*
3583          * The filter is effectively detached when pass all from both sides
3584          * or when the socket is closed and no more data is waiting
3585          * to be delivered to the filter
3586          */
3587         if (entry != NULL &&
3588             ((entry->cfe_snd.cfe_pass_offset == CFM_MAX_OFFSET &&
3589             entry->cfe_rcv.cfe_pass_offset == CFM_MAX_OFFSET) ||
3590             ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3591             cfil_queue_empty(&entry->cfe_snd.cfe_ctl_q) &&
3592             cfil_queue_empty(&entry->cfe_rcv.cfe_ctl_q)))) {
3593                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
3594 #if LIFECYCLE_DEBUG
3595                 cfil_info_log(LOG_ERR, cfil_info, outgoing ?
3596                                          "CFIL: LIFECYCLE: OUT - PASSED ALL - DETACH":
3597                                          "CFIL: LIFECYCLE: IN - PASSED ALL - DETACH");
3598 #endif
3599                 CFIL_LOG(LOG_INFO, "so %llx detached %u",
3600                         (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3601                 if ((cfil_info->cfi_flags & CFIF_CLOSE_WAIT) &&
3602                     cfil_filters_attached(so) == 0) {
3603 #if LIFECYCLE_DEBUG
3604                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAKING");
3605 #endif
3606                         CFIL_LOG(LOG_INFO, "so %llx waking",
3607                                 (uint64_t)VM_KERNEL_ADDRPERM(so));
3608                         wakeup((caddr_t)cfil_info);
3609                 }
3610         }
3611         CFIL_INFO_VERIFY(cfil_info);
3612         CFIL_LOG(LOG_INFO, "return %d", error);
3613         return (error);
3614 }
3615
3616 /*
3617  * Update pass offset for socket when no data is pending
3618  */
3619 static int
3620 cfil_set_socket_pass_offset(struct socket *so, struct cfil_info *cfil_info, int outgoing)
3621 {
3622         struct cfi_buf *cfi_buf;
3623         struct cfil_entry *entry;
3624         struct cfe_buf *entrybuf;
3625         uint32_t kcunit;
3626         uint64_t pass_offset = 0;
3627
3628         if (cfil_info == NULL)
3629                 return (0);
3630
3631         CFIL_LOG(LOG_INFO, "so %llx outgoing %d",
3632                 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
3633
3634         socket_lock_assert_owned(so);
3635
3636         if (outgoing)
3637                 cfi_buf = &cfil_info->cfi_snd;
3638         else
3639                 cfi_buf = &cfil_info->cfi_rcv;
3640
3641         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu> outgoing %d cfi_pending_first %llu cfi_pending_last %llu",
3642                          (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, outgoing,
3643                          cfi_buf->cfi_pending_first, cfi_buf->cfi_pending_last);
3644
3645         if (cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first == 0) {
3646                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3647                         entry = &cfil_info->cfi_entries[kcunit - 1];
3648
3649                         /* Are we attached to a filter? */
3650                         if (entry->cfe_filter == NULL)
3651                                 continue;
3652
3653                         if (outgoing)
3654                                 entrybuf = &entry->cfe_snd;
3655                         else
3656                                 entrybuf = &entry->cfe_rcv;
3657
3658                         if (pass_offset == 0 ||
3659                             entrybuf->cfe_pass_offset < pass_offset)
3660                                 pass_offset = entrybuf->cfe_pass_offset;
3661                 }
3662                 cfi_buf->cfi_pass_offset = pass_offset;
3663         }
3664
3665         CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx, sockID %llu>, cfi_pass_offset %llu",
3666                          (uint64_t)VM_KERNEL_ADDRPERM(so), cfil_info->cfi_sock_id, cfi_buf->cfi_pass_offset);
3667
3668         return (0);
3669 }
3670
3671 int
3672 cfil_action_data_pass(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit, int outgoing,
3673         uint64_t pass_offset, uint64_t peek_offset)
3674 {
3675         errno_t error = 0;
3676
3677         CFIL_LOG(LOG_INFO, "");
3678
3679         socket_lock_assert_owned(so);
3680
3681         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
3682         if (error != 0) {
3683                 CFIL_LOG(LOG_INFO, "so %llx %s dropped",
3684                         (uint64_t)VM_KERNEL_ADDRPERM(so),
3685                         outgoing ? "out" : "in");
3686                 goto release;
3687         }
3688
3689         error = cfil_update_data_offsets(so, cfil_info, kcunit, outgoing,
3690                 pass_offset, peek_offset);
3691
3692         cfil_service_inject_queue(so, cfil_info, outgoing);
3693
3694         cfil_set_socket_pass_offset(so, cfil_info, outgoing);
3695 release:
3696         CFIL_INFO_VERIFY(cfil_info);
3697         cfil_release_sockbuf(so, outgoing);
3698
3699         return (error);
3700 }
3701
3702
3703 static void
3704 cfil_flush_queues(struct socket *so, struct cfil_info *cfil_info)
3705 {
3706         struct cfil_entry *entry;
3707         int kcunit;
3708         uint64_t drained;
3709
3710         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL)
3711                 goto done;
3712
3713         socket_lock_assert_owned(so);
3714
3715         /*
3716          * Flush the output queues and ignore errors as long as
3717          * we are attached
3718          */
3719         (void) cfil_acquire_sockbuf(so, cfil_info, 1);
3720         if (cfil_info != NULL) {
3721                 drained = 0;
3722                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3723                         entry = &cfil_info->cfi_entries[kcunit - 1];
3724
3725                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_ctl_q);
3726                         drained += cfil_queue_drain(&entry->cfe_snd.cfe_pending_q);
3727                 }
3728                 drained += cfil_queue_drain(&cfil_info->cfi_snd.cfi_inject_q);
3729
3730                 if (drained) {
3731                         if (cfil_info->cfi_flags & CFIF_DROP)
3732                                 OSIncrementAtomic(
3733                                         &cfil_stats.cfs_flush_out_drop);
3734                         else
3735                                 OSIncrementAtomic(
3736                                         &cfil_stats.cfs_flush_out_close);
3737                 }
3738         }
3739         cfil_release_sockbuf(so, 1);
3740
3741         /*
3742          * Flush the input queues
3743          */
3744         (void) cfil_acquire_sockbuf(so, cfil_info, 0);
3745         if (cfil_info != NULL) {
3746                 drained = 0;
3747                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3748                         entry = &cfil_info->cfi_entries[kcunit - 1];
3749
3750                                 drained += cfil_queue_drain(
3751                                         &entry->cfe_rcv.cfe_ctl_q);
3752                                 drained += cfil_queue_drain(
3753                                         &entry->cfe_rcv.cfe_pending_q);
3754                 }
3755                 drained += cfil_queue_drain(&cfil_info->cfi_rcv.cfi_inject_q);
3756
3757                 if (drained) {
3758                         if (cfil_info->cfi_flags & CFIF_DROP)
3759                                 OSIncrementAtomic(
3760                                         &cfil_stats.cfs_flush_in_drop);
3761                         else
3762                                 OSIncrementAtomic(
3763                                         &cfil_stats.cfs_flush_in_close);
3764                 }
3765         }
3766         cfil_release_sockbuf(so, 0);
3767 done:
3768         CFIL_INFO_VERIFY(cfil_info);
3769 }
3770
3771 int
3772 cfil_action_drop(struct socket *so, struct cfil_info *cfil_info, uint32_t kcunit)
3773 {
3774         errno_t error = 0;
3775         struct cfil_entry *entry;
3776         struct proc *p;
3777
3778         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || cfil_info == NULL)
3779                 goto done;
3780
3781         socket_lock_assert_owned(so);
3782
3783         entry = &cfil_info->cfi_entries[kcunit - 1];
3784
3785         /* Are we attached to the filter? */
3786         if (entry->cfe_filter == NULL)
3787                 goto done;
3788
3789         cfil_info->cfi_flags |= CFIF_DROP;
3790
3791         p = current_proc();
3792
3793         /*
3794          * Force the socket to be marked defunct
3795          * (forcing fixed along with rdar://19391339)
3796          */
3797         if (so->so_cfil_db == NULL) {
3798                 error = sosetdefunct(p, so,
3799                                                          SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL,
3800                                                          FALSE);
3801
3802                 /* Flush the socket buffer and disconnect */
3803                 if (error == 0)
3804                         error = sodefunct(p, so,
3805                                                           SHUTDOWN_SOCKET_LEVEL_CONTENT_FILTER | SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
3806         }
3807
3808         /* The filter is done, mark as detached */
3809         entry->cfe_flags |= CFEF_CFIL_DETACHED;
3810 #if LIFECYCLE_DEBUG
3811         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: DROP - DETACH");
3812 #endif
3813         CFIL_LOG(LOG_INFO, "so %llx detached %u",
3814                 (uint64_t)VM_KERNEL_ADDRPERM(so), kcunit);
3815
3816         /* Pending data needs to go */
3817         cfil_flush_queues(so, cfil_info);
3818
3819         if (cfil_info && (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)) {
3820                 if (cfil_filters_attached(so) == 0) {
3821                         CFIL_LOG(LOG_INFO, "so %llx waking",
3822                                 (uint64_t)VM_KERNEL_ADDRPERM(so));
3823                         wakeup((caddr_t)cfil_info);
3824                 }
3825         }
3826 done:
3827         return (error);
3828 }
3829
3830 int
3831 cfil_action_bless_client(uint32_t kcunit, struct cfil_msg_hdr *msghdr)
3832 {
3833         errno_t error = 0;
3834         struct cfil_info *cfil_info = NULL;
3835
3836         bool cfil_attached = false;
3837         struct cfil_msg_bless_client *blessmsg = (struct cfil_msg_bless_client *)msghdr;
3838
3839         // Search and lock socket
3840         struct socket *so = cfil_socket_from_client_uuid(blessmsg->cfb_client_uuid, &cfil_attached);
3841         if (so == NULL) {
3842                 error = ENOENT;
3843         } else {
3844                 // The client gets a pass automatically
3845                 cfil_info = (so->so_cfil_db != NULL) ?
3846                         cfil_db_get_cfil_info(so->so_cfil_db, msghdr->cfm_sock_id) : so->so_cfil;
3847
3848                 if (cfil_attached) {
3849 #if VERDICT_DEBUG
3850                         if (cfil_info != NULL) {
3851                                 CFIL_LOG(LOG_ERR, "CFIL: VERDICT RECEIVED: BLESS %s <so %llx sockID %llu>",
3852                                                  cfil_info->cfi_hash_entry ? "UDP" : "TCP",
3853                                                  (uint64_t)VM_KERNEL_ADDRPERM(so),
3854                                                  cfil_info->cfi_sock_id);
3855                         }
3856 #endif
3857                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 1, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3858                         (void)cfil_action_data_pass(so, cfil_info, kcunit, 0, CFM_MAX_OFFSET, CFM_MAX_OFFSET);
3859                 } else {
3860                         so->so_flags1 |= SOF1_CONTENT_FILTER_SKIP;
3861                 }
3862                 socket_unlock(so, 1);
3863         }
3864
3865         return (error);
3866 }
3867
3868 static int
3869 cfil_update_entry_offsets(struct socket *so, struct cfil_info *cfil_info, int outgoing, unsigned int datalen)
3870 {
3871         struct cfil_entry *entry;
3872         struct cfe_buf *entrybuf;
3873         uint32_t kcunit;
3874
3875         CFIL_LOG(LOG_INFO, "so %llx outgoing %d datalen %u",
3876                 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing, datalen);
3877
3878         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3879                 entry = &cfil_info->cfi_entries[kcunit - 1];
3880
3881                 /* Are we attached to the filter? */
3882                 if (entry->cfe_filter == NULL)
3883                         continue;
3884
3885                 if (outgoing)
3886                         entrybuf = &entry->cfe_snd;
3887                 else
3888                         entrybuf = &entry->cfe_rcv;
3889
3890                 entrybuf->cfe_ctl_q.q_start += datalen;
3891                 entrybuf->cfe_pass_offset = entrybuf->cfe_ctl_q.q_start;
3892                 entrybuf->cfe_peeked = entrybuf->cfe_ctl_q.q_start;
3893                 if (entrybuf->cfe_peek_offset < entrybuf->cfe_pass_offset)
3894                         entrybuf->cfe_peek_offset = entrybuf->cfe_pass_offset;
3895
3896                 entrybuf->cfe_ctl_q.q_end += datalen;
3897
3898                 entrybuf->cfe_pending_q.q_start += datalen;
3899                 entrybuf->cfe_pending_q.q_end += datalen;
3900         }
3901         CFIL_INFO_VERIFY(cfil_info);
3902         return (0);
3903 }
3904
3905 int
3906 cfil_data_common(struct socket *so, struct cfil_info *cfil_info, int outgoing, struct sockaddr *to,
3907                 struct mbuf *data, struct mbuf *control, uint32_t flags)
3908 {
3909 #pragma unused(to, control, flags)
3910         errno_t error = 0;
3911         unsigned int datalen;
3912         int mbcnt = 0;
3913         int mbnum = 0;
3914         int kcunit;
3915         struct cfi_buf *cfi_buf;
3916         struct mbuf *chain = NULL;
3917
3918         if (cfil_info == NULL) {
3919                 CFIL_LOG(LOG_ERR, "so %llx cfil detached",
3920                         (uint64_t)VM_KERNEL_ADDRPERM(so));
3921                 error = 0;
3922                 goto done;
3923         } else if (cfil_info->cfi_flags & CFIF_DROP) {
3924                 CFIL_LOG(LOG_ERR, "so %llx drop set",
3925                         (uint64_t)VM_KERNEL_ADDRPERM(so));
3926                 error = EPIPE;
3927                 goto done;
3928         }
3929
3930         datalen = cfil_data_length(data, &mbcnt, &mbnum);
3931
3932         if (outgoing)
3933                 cfi_buf = &cfil_info->cfi_snd;
3934         else
3935                 cfi_buf = &cfil_info->cfi_rcv;
3936
3937         cfi_buf->cfi_pending_last += datalen;
3938         cfi_buf->cfi_pending_mbcnt += mbcnt;
3939         cfi_buf->cfi_pending_mbnum += mbnum;
3940
3941         if (IS_UDP(so)) {
3942                 if (cfi_buf->cfi_pending_mbnum > cfil_udp_gc_mbuf_num_max ||
3943                         cfi_buf->cfi_pending_mbcnt > cfil_udp_gc_mbuf_cnt_max) {
3944                         cfi_buf->cfi_tail_drop_cnt++;
3945                         cfi_buf->cfi_pending_mbcnt -= mbcnt;
3946                         cfi_buf->cfi_pending_mbnum -= mbnum;
3947                         return (EPIPE);
3948                 }
3949         }
3950
3951         cfil_info_buf_verify(cfi_buf);
3952
3953 #if DATA_DEBUG
3954         CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: <so %llx> %s: data %llx len %u flags 0x%x nextpkt %llx - cfi_pending_last %llu cfi_pending_mbcnt %u   cfi_pass_offset %llu",
3955                          (uint64_t)VM_KERNEL_ADDRPERM(so),
3956                          outgoing ? "OUT" : "IN",
3957                          (uint64_t)VM_KERNEL_ADDRPERM(data), datalen, data->m_flags,
3958                          (uint64_t)VM_KERNEL_ADDRPERM(data->m_nextpkt),
3959                          cfi_buf->cfi_pending_last,
3960                          cfi_buf->cfi_pending_mbcnt,
3961                          cfi_buf->cfi_pass_offset);
3962 #endif
3963
3964         /* Fast path when below pass offset */
3965         if (cfi_buf->cfi_pending_last <= cfi_buf->cfi_pass_offset) {
3966                 cfil_update_entry_offsets(so, cfil_info, outgoing, datalen);
3967 #if DATA_DEBUG
3968                 CFIL_LOG(LOG_DEBUG, "CFIL: QUEUEING DATA: FAST PATH");
3969 #endif
3970         } else {
3971                 for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
3972                         // Is cfil attached to this filter?
3973                         if (IS_ENTRY_ATTACHED(cfil_info, kcunit)) {
3974                                 if (IS_UDP(so)) {
3975                                         /* UDP only:
3976                                          * Chain addr (incoming only TDB), control (optional) and data into one chain.
3977                                          * This full chain will be reinjected into socket after recieving verdict.
3978                                          */
3979                                         (void) cfil_udp_save_socket_state(cfil_info, data);
3980                                         chain = sbconcat_mbufs(NULL, outgoing ? NULL : to, data, control);
3981                                         if (chain == NULL) {
3982                                                 return (ENOBUFS);
3983                                         }
3984                                         data = chain;
3985                                 }
3986                                 error = cfil_data_filter(so, cfil_info, kcunit, outgoing, data,
3987                                                                                  datalen);
3988                         }
3989                         /* 0 means passed so continue with next filter */
3990                         if (error != 0)
3991                                 break;
3992                 }
3993         }
3994
3995         /* Move cursor if no filter claimed the data */
3996         if (error == 0) {
3997                 cfi_buf->cfi_pending_first += datalen;
3998                 cfi_buf->cfi_pending_mbcnt -= mbcnt;
3999                 cfi_buf->cfi_pending_mbnum -= mbnum;
4000                 cfil_info_buf_verify(cfi_buf);
4001         }
4002 done:
4003         CFIL_INFO_VERIFY(cfil_info);
4004
4005         return (error);
4006 }
4007
4008 /*
4009  * Callback from socket layer sosendxxx()
4010  */
4011 int
4012 cfil_sock_data_out(struct socket *so, struct sockaddr  *to,
4013                 struct mbuf *data, struct mbuf *control, uint32_t flags)
4014 {
4015         int error = 0;
4016
4017         if (IS_UDP(so)) {
4018         return (cfil_sock_udp_handle_data(TRUE, so, NULL, to, data, control, flags));
4019     }
4020
4021         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4022                 return (0);
4023
4024         socket_lock_assert_owned(so);
4025
4026         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4027                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4028                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4029                 return (EPIPE);
4030         }
4031         if (control != NULL) {
4032                 CFIL_LOG(LOG_ERR, "so %llx control",
4033                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4034                 OSIncrementAtomic(&cfil_stats.cfs_data_out_control);
4035         }
4036         if ((flags & MSG_OOB)) {
4037                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4038                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4039                 OSIncrementAtomic(&cfil_stats.cfs_data_out_oob);
4040         }
4041         if ((so->so_snd.sb_flags & SB_LOCK) == 0)
4042                 panic("so %p SB_LOCK not set", so);
4043
4044         if (so->so_snd.sb_cfil_thread != NULL)
4045                 panic("%s sb_cfil_thread %p not NULL", __func__,
4046                         so->so_snd.sb_cfil_thread);
4047
4048         error = cfil_data_common(so, so->so_cfil, 1, to, data, control, flags);
4049
4050         return (error);
4051 }
4052
4053 /*
4054  * Callback from socket layer sbappendxxx()
4055  */
4056 int
4057 cfil_sock_data_in(struct socket *so, struct sockaddr *from,
4058         struct mbuf *data, struct mbuf *control, uint32_t flags)
4059 {
4060         int error = 0;
4061
4062         if (IS_UDP(so)) {
4063         return (cfil_sock_udp_handle_data(FALSE, so, NULL, from, data, control, flags));
4064     }
4065
4066         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4067                 return (0);
4068
4069         socket_lock_assert_owned(so);
4070
4071         if (so->so_cfil->cfi_flags & CFIF_DROP) {
4072                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4073                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4074                 return (EPIPE);
4075         }
4076         if (control != NULL) {
4077                 CFIL_LOG(LOG_ERR, "so %llx control",
4078                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4079                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
4080         }
4081         if (data->m_type == MT_OOBDATA) {
4082                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
4083                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4084                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
4085         }
4086         error = cfil_data_common(so, so->so_cfil, 0, from, data, control, flags);
4087
4088         return (error);
4089 }
4090
4091 /*
4092  * Callback from socket layer soshutdownxxx()
4093  *
4094  * We may delay the shutdown write if there's outgoing data in process.
4095  *
4096  * There is no point in delaying the shutdown read because the process
4097  * indicated that it does not want to read anymore data.
4098  */
4099 int
4100 cfil_sock_shutdown(struct socket *so, int *how)
4101 {
4102         int error = 0;
4103
4104         if (IS_UDP(so)) {
4105                 return (cfil_sock_udp_shutdown(so, how));
4106         }
4107
4108         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4109                 goto done;
4110
4111         socket_lock_assert_owned(so);
4112
4113         CFIL_LOG(LOG_INFO, "so %llx how %d",
4114                 (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
4115
4116         /*
4117          * Check the state of the socket before the content filter
4118          */
4119         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
4120                 /* read already shut down */
4121                 error = ENOTCONN;
4122                 goto done;
4123         }
4124         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
4125                 /* write already shut down */
4126                 error = ENOTCONN;
4127                 goto done;
4128         }
4129
4130         if ((so->so_cfil->cfi_flags & CFIF_DROP) != 0) {
4131                 CFIL_LOG(LOG_ERR, "so %llx drop set",
4132                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4133                 goto done;
4134         }
4135
4136         /*
4137          * shutdown read: SHUT_RD or SHUT_RDWR
4138          */
4139         if (*how != SHUT_WR) {
4140                 if (so->so_cfil->cfi_flags & CFIF_SHUT_RD) {
4141                         error = ENOTCONN;
4142                         goto done;
4143                 }
4144                 so->so_cfil->cfi_flags |= CFIF_SHUT_RD;
4145                 cfil_sock_notify_shutdown(so, SHUT_RD);
4146         }
4147         /*
4148          * shutdown write: SHUT_WR or SHUT_RDWR
4149          */
4150         if (*how != SHUT_RD) {
4151                 if (so->so_cfil->cfi_flags & CFIF_SHUT_WR) {
4152                         error = ENOTCONN;
4153                         goto done;
4154                 }
4155                 so->so_cfil->cfi_flags |= CFIF_SHUT_WR;
4156                 cfil_sock_notify_shutdown(so, SHUT_WR);
4157                 /*
4158                  * When outgoing data is pending, we delay the shutdown at the
4159                  * protocol level until the content filters give the final
4160                  * verdict on the pending data.
4161                  */
4162                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
4163                         /*
4164                          * When shutting down the read and write sides at once
4165                          * we can proceed to the final shutdown of the read
4166                          * side. Otherwise, we just return.
4167                          */
4168                         if (*how == SHUT_WR) {
4169                                 error = EJUSTRETURN;
4170                         } else if (*how == SHUT_RDWR) {
4171                                 *how = SHUT_RD;
4172                         }
4173                 }
4174         }
4175 done:
4176         return (error);
4177 }
4178
4179 /*
4180  * This is called when the socket is closed and there is no more
4181  * opportunity for filtering
4182  */
4183 void
4184 cfil_sock_is_closed(struct socket *so)
4185 {
4186         errno_t error = 0;
4187         int kcunit;
4188
4189         if (IS_UDP(so)) {
4190                 cfil_sock_udp_is_closed(so);
4191                 return;
4192         }
4193
4194         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4195                 return;
4196
4197         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4198
4199         socket_lock_assert_owned(so);
4200
4201         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4202                 /* Let the filters know of the closing */
4203                 error = cfil_dispatch_closed_event(so, so->so_cfil, kcunit);
4204         }
4205
4206         /* Last chance to push passed data out */
4207         error = cfil_acquire_sockbuf(so, so->so_cfil, 1);
4208         if (error == 0)
4209                 cfil_service_inject_queue(so, so->so_cfil, 1);
4210         cfil_release_sockbuf(so, 1);
4211
4212         so->so_cfil->cfi_flags |= CFIF_SOCK_CLOSED;
4213
4214         /* Pending data needs to go */
4215         cfil_flush_queues(so, so->so_cfil);
4216
4217         CFIL_INFO_VERIFY(so->so_cfil);
4218 }
4219
4220 /*
4221  * This is called when the socket is disconnected so let the filters
4222  * know about the disconnection and that no more data will come
4223  *
4224  * The how parameter has the same values as soshutown()
4225  */
4226 void
4227 cfil_sock_notify_shutdown(struct socket *so, int how)
4228 {
4229         errno_t error = 0;
4230         int kcunit;
4231
4232         if (IS_UDP(so)) {
4233                 cfil_sock_udp_notify_shutdown(so, how, 0, 0);
4234                 return;
4235         }
4236
4237         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4238                 return;
4239
4240         CFIL_LOG(LOG_INFO, "so %llx how %d",
4241                 (uint64_t)VM_KERNEL_ADDRPERM(so), how);
4242
4243         socket_lock_assert_owned(so);
4244
4245         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4246                 /* Disconnect incoming side */
4247                 if (how != SHUT_WR)
4248                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 0);
4249                 /* Disconnect outgoing side */
4250                 if (how != SHUT_RD)
4251                         error = cfil_dispatch_disconnect_event(so, so->so_cfil, kcunit, 1);
4252         }
4253 }
4254
4255 static int
4256 cfil_filters_attached(struct socket *so)
4257 {
4258         struct cfil_entry *entry;
4259         uint32_t kcunit;
4260         int attached = 0;
4261
4262         if (IS_UDP(so)) {
4263                 return cfil_filters_udp_attached(so, FALSE);
4264         }
4265
4266         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4267                 return (0);
4268
4269         socket_lock_assert_owned(so);
4270
4271         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
4272                 entry = &so->so_cfil->cfi_entries[kcunit - 1];
4273
4274                 /* Are we attached to the filter? */
4275                 if (entry->cfe_filter == NULL)
4276                         continue;
4277                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
4278                         continue;
4279                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
4280                         continue;
4281                 attached = 1;
4282                 break;
4283         }
4284
4285         return (attached);
4286 }
4287
4288 /*
4289  * This is called when the socket is closed and we are waiting for
4290  * the filters to gives the final pass or drop
4291  */
4292 void
4293 cfil_sock_close_wait(struct socket *so)
4294 {
4295         lck_mtx_t *mutex_held;
4296         struct timespec ts;
4297         int error;
4298
4299         if (IS_UDP(so)) {
4300                 cfil_sock_udp_close_wait(so);
4301                 return;
4302         }
4303
4304         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4305                 return;
4306
4307         CFIL_LOG(LOG_INFO, "so %llx", (uint64_t)VM_KERNEL_ADDRPERM(so));
4308
4309         if (so->so_proto->pr_getlock != NULL)
4310                 mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
4311         else
4312                 mutex_held = so->so_proto->pr_domain->dom_mtx;
4313         LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
4314
4315         while (cfil_filters_attached(so)) {
4316                 /*
4317                  * Notify the filters we are going away so they can detach
4318                  */
4319                 cfil_sock_notify_shutdown(so, SHUT_RDWR);
4320
4321                 /*
4322                  * Make sure we need to wait after the filter are notified
4323                  * of the disconnection
4324                  */
4325                 if (cfil_filters_attached(so) == 0)
4326                         break;
4327
4328                 CFIL_LOG(LOG_INFO, "so %llx waiting",
4329                         (uint64_t)VM_KERNEL_ADDRPERM(so));
4330
4331                 ts.tv_sec = cfil_close_wait_timeout / 1000;
4332                 ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
4333                         NSEC_PER_USEC * 1000;
4334
4335                 OSIncrementAtomic(&cfil_stats.cfs_close_wait);
4336                 so->so_cfil->cfi_flags |= CFIF_CLOSE_WAIT;
4337                 error = msleep((caddr_t)so->so_cfil, mutex_held,
4338                         PSOCK | PCATCH, "cfil_sock_close_wait", &ts);
4339                 so->so_cfil->cfi_flags &= ~CFIF_CLOSE_WAIT;
4340
4341                 CFIL_LOG(LOG_NOTICE, "so %llx timed out %d",
4342                         (uint64_t)VM_KERNEL_ADDRPERM(so), (error != 0));
4343
4344                 /*
4345                  * Force close in case of timeout
4346                  */
4347                 if (error != 0) {
4348                         OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
4349                         break;
4350                 }
4351         }
4352
4353 }
4354
4355 /*
4356  * Returns the size of the data held by the content filter by using
4357  */
4358 int32_t
4359 cfil_sock_data_pending(struct sockbuf *sb)
4360 {
4361         struct socket *so = sb->sb_so;
4362         uint64_t pending = 0;
4363
4364         if (IS_UDP(so)) {
4365                 return (cfil_sock_udp_data_pending(sb, FALSE));
4366         }
4367
4368         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL) {
4369                 struct cfi_buf *cfi_buf;
4370
4371                 socket_lock_assert_owned(so);
4372
4373                 if ((sb->sb_flags & SB_RECV) == 0)
4374                         cfi_buf = &so->so_cfil->cfi_snd;
4375                 else
4376                         cfi_buf = &so->so_cfil->cfi_rcv;
4377
4378                 pending = cfi_buf->cfi_pending_last -
4379                         cfi_buf->cfi_pending_first;
4380
4381                 /*
4382                  * If we are limited by the "chars of mbufs used" roughly
4383                  * adjust so we won't overcommit
4384                  */
4385                 if (pending > (uint64_t)cfi_buf->cfi_pending_mbcnt)
4386                         pending = cfi_buf->cfi_pending_mbcnt;
4387         }
4388
4389         VERIFY(pending < INT32_MAX);
4390
4391         return (int32_t)(pending);
4392 }
4393
4394 /*
4395  * Return the socket buffer space used by data being held by content filters
4396  * so processes won't clog the socket buffer
4397  */
4398 int32_t
4399 cfil_sock_data_space(struct sockbuf *sb)
4400 {
4401         struct socket *so = sb->sb_so;
4402         uint64_t pending = 0;
4403
4404         if (IS_UDP(so)) {
4405                 return (cfil_sock_udp_data_pending(sb, TRUE));
4406         }
4407
4408         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil != NULL &&
4409                 so->so_snd.sb_cfil_thread != current_thread()) {
4410                 struct cfi_buf *cfi_buf;
4411
4412                 socket_lock_assert_owned(so);
4413
4414                 if ((sb->sb_flags & SB_RECV) == 0)
4415                         cfi_buf = &so->so_cfil->cfi_snd;
4416                 else
4417                         cfi_buf = &so->so_cfil->cfi_rcv;
4418
4419                 pending = cfi_buf->cfi_pending_last -
4420                         cfi_buf->cfi_pending_first;
4421
4422                 /*
4423                  * If we are limited by the "chars of mbufs used" roughly
4424                  * adjust so we won't overcommit
4425                  */
4426                 if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
4427                         pending = cfi_buf->cfi_pending_mbcnt;
4428         }
4429
4430         VERIFY(pending < INT32_MAX);
4431
4432         return (int32_t)(pending);
4433 }
4434
4435 /*
4436  * A callback from the socket and protocol layer when data becomes
4437  * available in the socket buffer to give a chance for the content filter
4438  * to re-inject data that was held back
4439  */
4440 void
4441 cfil_sock_buf_update(struct sockbuf *sb)
4442 {
4443         int outgoing;
4444         int error;
4445         struct socket *so = sb->sb_so;
4446
4447     if (IS_UDP(so)) {
4448                 cfil_sock_udp_buf_update(sb);
4449                 return;
4450     }
4451
4452         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || so->so_cfil == NULL)
4453                 return;
4454
4455         if (!cfil_sbtrim)
4456                 return;
4457
4458         socket_lock_assert_owned(so);
4459
4460         if ((sb->sb_flags & SB_RECV) == 0) {
4461                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
4462                         return;
4463                 outgoing = 1;
4464                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
4465         } else {
4466                 if ((so->so_cfil->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
4467                         return;
4468                 outgoing = 0;
4469                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
4470         }
4471
4472         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
4473                 (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
4474
4475         error = cfil_acquire_sockbuf(so, so->so_cfil, outgoing);
4476         if (error == 0)
4477                 cfil_service_inject_queue(so, so->so_cfil, outgoing);
4478         cfil_release_sockbuf(so, outgoing);
4479 }
4480
4481 int
4482 sysctl_cfil_filter_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4483         struct sysctl_req *req)
4484 {
4485 #pragma unused(oidp, arg1, arg2)
4486         int error = 0;
4487         size_t len = 0;
4488         u_int32_t i;
4489
4490         /* Read only  */
4491         if (req->newptr != USER_ADDR_NULL)
4492                 return (EPERM);
4493
4494         cfil_rw_lock_shared(&cfil_lck_rw);
4495
4496         for (i = 0; content_filters != NULL && i < MAX_CONTENT_FILTER; i++) {
4497                 struct cfil_filter_stat filter_stat;
4498                 struct content_filter *cfc = content_filters[i];
4499
4500                 if (cfc == NULL)
4501                         continue;
4502
4503                 /* If just asking for the size */
4504                 if (req->oldptr == USER_ADDR_NULL) {
4505                         len += sizeof(struct cfil_filter_stat);
4506                         continue;
4507                 }
4508
4509                 bzero(&filter_stat, sizeof(struct cfil_filter_stat));
4510                 filter_stat.cfs_len = sizeof(struct cfil_filter_stat);
4511                 filter_stat.cfs_filter_id = cfc->cf_kcunit;
4512                 filter_stat.cfs_flags = cfc->cf_flags;
4513                 filter_stat.cfs_sock_count = cfc->cf_sock_count;
4514                 filter_stat.cfs_necp_control_unit = cfc->cf_necp_control_unit;
4515
4516                 error = SYSCTL_OUT(req, &filter_stat,
4517                         sizeof (struct cfil_filter_stat));
4518                 if (error != 0)
4519                         break;
4520         }
4521         /* If just asking for the size */
4522         if (req->oldptr == USER_ADDR_NULL)
4523                 req->oldidx = len;
4524
4525         cfil_rw_unlock_shared(&cfil_lck_rw);
4526
4527 #if SHOW_DEBUG
4528         if (req->oldptr != USER_ADDR_NULL) {
4529                 for (i = 1; content_filters != NULL && i <= MAX_CONTENT_FILTER; i++) {
4530                         cfil_filter_show(i);
4531                 }
4532         }
4533 #endif
4534
4535         return (error);
4536 }
4537
4538 static int sysctl_cfil_sock_list(struct sysctl_oid *oidp, void *arg1, int arg2,
4539         struct sysctl_req *req)
4540 {
4541 #pragma unused(oidp, arg1, arg2)
4542         int error = 0;
4543         u_int32_t i;
4544         struct cfil_info *cfi;
4545
4546         /* Read only  */
4547         if (req->newptr != USER_ADDR_NULL)
4548                 return (EPERM);
4549
4550         cfil_rw_lock_shared(&cfil_lck_rw);
4551
4552         /*
4553          * If just asking for the size,
4554          */
4555         if (req->oldptr == USER_ADDR_NULL) {
4556                 req->oldidx = cfil_sock_attached_count *
4557                         sizeof(struct cfil_sock_stat);
4558                 /* Bump the length in case new sockets gets attached */
4559                 req->oldidx += req->oldidx >> 3;
4560                 goto done;
4561         }
4562
4563         TAILQ_FOREACH(cfi, &cfil_sock_head, cfi_link) {
4564                 struct cfil_entry *entry;
4565                 struct cfil_sock_stat stat;
4566                 struct socket *so = cfi->cfi_so;
4567
4568                 bzero(&stat, sizeof(struct cfil_sock_stat));
4569                 stat.cfs_len = sizeof(struct cfil_sock_stat);
4570                 stat.cfs_sock_id = cfi->cfi_sock_id;
4571                 stat.cfs_flags = cfi->cfi_flags;
4572
4573                 if (so != NULL) {
4574                         stat.cfs_pid = so->last_pid;
4575                         memcpy(stat.cfs_uuid, so->last_uuid,
4576                                 sizeof(uuid_t));
4577                         if (so->so_flags & SOF_DELEGATED) {
4578                                 stat.cfs_e_pid = so->e_pid;
4579                                 memcpy(stat.cfs_e_uuid, so->e_uuid,
4580                                         sizeof(uuid_t));
4581                         } else {
4582                                 stat.cfs_e_pid = so->last_pid;
4583                                 memcpy(stat.cfs_e_uuid, so->last_uuid,
4584                                         sizeof(uuid_t));
4585                         }
4586
4587                         stat.cfs_sock_family = so->so_proto->pr_domain->dom_family;
4588                         stat.cfs_sock_type = so->so_proto->pr_type;
4589                         stat.cfs_sock_protocol = so->so_proto->pr_protocol;
4590                 }
4591
4592                 stat.cfs_snd.cbs_pending_first =
4593                         cfi->cfi_snd.cfi_pending_first;
4594                 stat.cfs_snd.cbs_pending_last =
4595                         cfi->cfi_snd.cfi_pending_last;
4596                 stat.cfs_snd.cbs_inject_q_len =
4597                         cfil_queue_len(&cfi->cfi_snd.cfi_inject_q);
4598                 stat.cfs_snd.cbs_pass_offset =
4599                         cfi->cfi_snd.cfi_pass_offset;
4600
4601                 stat.cfs_rcv.cbs_pending_first =
4602                         cfi->cfi_rcv.cfi_pending_first;
4603                 stat.cfs_rcv.cbs_pending_last =
4604                         cfi->cfi_rcv.cfi_pending_last;
4605                 stat.cfs_rcv.cbs_inject_q_len =
4606                         cfil_queue_len(&cfi->cfi_rcv.cfi_inject_q);
4607                 stat.cfs_rcv.cbs_pass_offset =
4608                         cfi->cfi_rcv.cfi_pass_offset;
4609
4610                 for (i = 0; i < MAX_CONTENT_FILTER; i++) {
4611                         struct cfil_entry_stat *estat;
4612                         struct cfe_buf *ebuf;
4613                         struct cfe_buf_stat *sbuf;
4614
4615                         entry = &cfi->cfi_entries[i];
4616
4617                         estat = &stat.ces_entries[i];
4618
4619                         estat->ces_len = sizeof(struct cfil_entry_stat);
4620                         estat->ces_filter_id = entry->cfe_filter ?
4621                                 entry->cfe_filter->cf_kcunit : 0;
4622                         estat->ces_flags = entry->cfe_flags;
4623                         estat->ces_necp_control_unit =
4624                                 entry->cfe_necp_control_unit;
4625
4626                         estat->ces_last_event.tv_sec =
4627                                 (int64_t)entry->cfe_last_event.tv_sec;
4628                         estat->ces_last_event.tv_usec =
4629                                 (int64_t)entry->cfe_last_event.tv_usec;
4630
4631                         estat->ces_last_action.tv_sec =
4632                                 (int64_t)entry->cfe_last_action.tv_sec;
4633                         estat->ces_last_action.tv_usec =
4634                                 (int64_t)entry->cfe_last_action.tv_usec;
4635
4636                         ebuf = &entry->cfe_snd;
4637                         sbuf = &estat->ces_snd;
4638                         sbuf->cbs_pending_first =
4639                                 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4640                         sbuf->cbs_pending_last =
4641                                 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4642                         sbuf->cbs_ctl_first =
4643                                 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4644                         sbuf->cbs_ctl_last =
4645                                 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4646                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
4647                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
4648                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
4649
4650                         ebuf = &entry->cfe_rcv;
4651                         sbuf = &estat->ces_rcv;
4652                         sbuf->cbs_pending_first =
4653                                 cfil_queue_offset_first(&ebuf->cfe_pending_q);
4654                         sbuf->cbs_pending_last =
4655                                 cfil_queue_offset_last(&ebuf->cfe_pending_q);
4656                         sbuf->cbs_ctl_first =
4657                                 cfil_queue_offset_first(&ebuf->cfe_ctl_q);
4658                         sbuf->cbs_ctl_last =
4659                                 cfil_queue_offset_last(&ebuf->cfe_ctl_q);
4660                         sbuf->cbs_pass_offset =  ebuf->cfe_pass_offset;
4661                         sbuf->cbs_peek_offset =  ebuf->cfe_peek_offset;
4662                         sbuf->cbs_peeked =  ebuf->cfe_peeked;
4663                 }
4664                 error = SYSCTL_OUT(req, &stat,
4665                         sizeof (struct cfil_sock_stat));
4666                 if (error != 0)
4667                         break;
4668         }
4669 done:
4670         cfil_rw_unlock_shared(&cfil_lck_rw);
4671
4672 #if SHOW_DEBUG
4673         if (req->oldptr != USER_ADDR_NULL) {
4674                 cfil_info_show();
4675         }
4676 #endif
4677
4678         return (error);
4679 }
4680
4681 /*
4682  * UDP Socket Support
4683  */
4684 static void
4685 cfil_hash_entry_log(int level, struct socket *so, struct cfil_hash_entry *entry, uint64_t sockId, const char* msg)
4686 {
4687     char local[MAX_IPv6_STR_LEN+6];
4688     char remote[MAX_IPv6_STR_LEN+6];
4689     const void  *addr;
4690
4691         // No sock or not UDP, no-op
4692     if (so == NULL || entry == NULL) {
4693         return;
4694     }
4695
4696     local[0] = remote[0] = 0x0;
4697
4698     switch (entry->cfentry_family) {
4699         case AF_INET6:
4700             addr = &entry->cfentry_laddr.addr6;
4701             inet_ntop(AF_INET6, addr, local, sizeof(local));
4702             addr = &entry->cfentry_faddr.addr6;
4703             inet_ntop(AF_INET6, addr, remote, sizeof(local));
4704             break;
4705         case AF_INET:
4706             addr = &entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
4707             inet_ntop(AF_INET, addr, local, sizeof(local));
4708             addr = &entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
4709             inet_ntop(AF_INET, addr, remote, sizeof(local));
4710             break;
4711         default:
4712             return;
4713     }
4714
4715         CFIL_LOG(level, "<%s>: <UDP so %llx, entry %p, sockID %llu> lport %d fport %d laddr %s faddr %s",
4716                          msg,
4717                          (uint64_t)VM_KERNEL_ADDRPERM(so), entry, sockId,
4718                          ntohs(entry->cfentry_lport), ntohs(entry->cfentry_fport), local, remote);
4719 }
4720
4721 static void
4722 cfil_inp_log(int level, struct socket *so, const char* msg)
4723 {
4724     struct inpcb *inp = NULL;
4725     char local[MAX_IPv6_STR_LEN+6];
4726     char remote[MAX_IPv6_STR_LEN+6];
4727     const void  *addr;
4728
4729     if (so == NULL) {
4730         return;
4731     }
4732
4733     inp = sotoinpcb(so);
4734     if (inp == NULL) {
4735         return;
4736     }
4737
4738     local[0] = remote[0] = 0x0;
4739
4740 #if INET6
4741     if (inp->inp_vflag & INP_IPV6) {
4742         addr = &inp->in6p_laddr.s6_addr32;
4743         inet_ntop(AF_INET6, addr, local, sizeof(local));
4744         addr = &inp->in6p_faddr.s6_addr32;
4745         inet_ntop(AF_INET6, addr, remote, sizeof(local));
4746     } else
4747 #endif /* INET6 */
4748     {
4749         addr = &inp->inp_laddr.s_addr;
4750         inet_ntop(AF_INET, addr, local, sizeof(local));
4751         addr = &inp->inp_faddr.s_addr;
4752         inet_ntop(AF_INET, addr, remote, sizeof(local));
4753     }
4754
4755         if (so->so_cfil != NULL)
4756                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x, sockID %llu> lport %d fport %d laddr %s faddr %s",
4757                                  msg, IS_UDP(so) ? "UDP" : "TCP",
4758                                  (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags, so->so_cfil->cfi_sock_id,
4759                                  ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4760         else
4761                 CFIL_LOG(level, "<%s>: <%s so %llx - flags 0x%x 0x%x> lport %d fport %d laddr %s faddr %s",
4762                                  msg, IS_UDP(so) ? "UDP" : "TCP",
4763                                  (uint64_t)VM_KERNEL_ADDRPERM(so), inp->inp_flags, inp->inp_socket->so_flags,
4764                                  ntohs(inp->inp_lport), ntohs(inp->inp_fport), local, remote);
4765 }
4766
4767 static void
4768 cfil_info_log(int level, struct cfil_info *cfil_info, const char* msg)
4769 {
4770         if (cfil_info == NULL)
4771                 return;
4772
4773         if (cfil_info->cfi_hash_entry != NULL)
4774                 cfil_hash_entry_log(level, cfil_info->cfi_so, cfil_info->cfi_hash_entry, cfil_info->cfi_sock_id, msg);
4775         else
4776                 cfil_inp_log(level, cfil_info->cfi_so, msg);
4777 }
4778
4779 errno_t
4780 cfil_db_init(struct socket *so)
4781 {
4782     errno_t error = 0;
4783     struct cfil_db *db = NULL;
4784
4785     CFIL_LOG(LOG_INFO, "");
4786
4787     db = zalloc(cfil_db_zone);
4788     if (db == NULL) {
4789         error = ENOMEM;
4790         goto done;
4791     }
4792     bzero(db, sizeof(struct cfil_db));
4793     db->cfdb_so = so;
4794     db->cfdb_hashbase = hashinit(CFILHASHSIZE, M_CFIL, &db->cfdb_hashmask);
4795     if (db->cfdb_hashbase == NULL) {
4796         zfree(cfil_db_zone, db);
4797         db = NULL;
4798         error = ENOMEM;
4799         goto done;
4800     }
4801
4802     so->so_cfil_db = db;
4803
4804 done:
4805     return (error);
4806 }
4807
4808 void
4809 cfil_db_free(struct socket *so)
4810 {
4811     struct cfil_hash_entry *entry = NULL;
4812     struct cfil_hash_entry *temp_entry = NULL;
4813     struct cfilhashhead *cfilhash = NULL;
4814     struct cfil_db *db = NULL;
4815
4816     CFIL_LOG(LOG_INFO, "");
4817
4818     if (so == NULL || so->so_cfil_db == NULL) {
4819         return;
4820     }
4821     db = so->so_cfil_db;
4822
4823 #if LIFECYCLE_DEBUG
4824         CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: <so %llx, db %p> freeing db (count == %d)",
4825              (uint64_t)VM_KERNEL_ADDRPERM(so), db, db->cfdb_count);
4826 #endif
4827
4828     for (int i = 0; i < CFILHASHSIZE; i++) {
4829         cfilhash = &db->cfdb_hashbase[i];
4830         LIST_FOREACH_SAFE(entry, cfilhash, cfentry_link, temp_entry) {
4831             if (entry->cfentry_cfil != NULL) {
4832 #if LIFECYCLE_DEBUG
4833                                 cfil_info_log(LOG_ERR, entry->cfentry_cfil, "CFIL: LIFECYCLE: DB FREE CLEAN UP");
4834 #endif
4835                 cfil_info_free(entry->cfentry_cfil);
4836                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
4837                 entry->cfentry_cfil = NULL;
4838             }
4839
4840             cfil_db_delete_entry(db, entry);
4841             if (so->so_flags & SOF_CONTENT_FILTER) {
4842                 if (db->cfdb_count == 0)
4843                     so->so_flags &= ~SOF_CONTENT_FILTER;
4844                 VERIFY(so->so_usecount > 0);
4845                 so->so_usecount--;
4846             }
4847         }
4848     }
4849
4850     // Make sure all entries are cleaned up!
4851     VERIFY(db->cfdb_count == 0);
4852 #if LIFECYCLE_DEBUG
4853     CFIL_LOG(LOG_ERR, "CFIL: LIFECYCLE: so usecount %d", so->so_usecount);
4854 #endif
4855
4856     FREE(db->cfdb_hashbase, M_CFIL);
4857     zfree(cfil_db_zone, db);
4858     so->so_cfil_db = NULL;
4859 }
4860
4861 static bool
4862 fill_cfil_hash_entry_from_address(struct cfil_hash_entry *entry, bool isLocal, struct sockaddr *addr)
4863 {
4864     struct sockaddr_in *sin = NULL;
4865     struct sockaddr_in6 *sin6 = NULL;
4866
4867     if (entry == NULL || addr == NULL) {
4868         return FALSE;
4869     }
4870
4871     switch (addr->sa_family) {
4872         case AF_INET:
4873             sin = satosin(addr);
4874             if (sin->sin_len != sizeof(*sin)) {
4875                 return FALSE;
4876             }
4877             if (isLocal == TRUE) {
4878                 entry->cfentry_lport = sin->sin_port;
4879                 entry->cfentry_laddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4880             } else {
4881                 entry->cfentry_fport = sin->sin_port;
4882                 entry->cfentry_faddr.addr46.ia46_addr4.s_addr = sin->sin_addr.s_addr;
4883             }
4884             entry->cfentry_family = AF_INET;
4885             return TRUE;
4886         case AF_INET6:
4887             sin6 = satosin6(addr);
4888             if (sin6->sin6_len != sizeof(*sin6)) {
4889                 return FALSE;
4890             }
4891             if (isLocal == TRUE) {
4892                 entry->cfentry_lport = sin6->sin6_port;
4893                 entry->cfentry_laddr.addr6 = sin6->sin6_addr;
4894             } else {
4895                 entry->cfentry_fport = sin6->sin6_port;
4896                 entry->cfentry_faddr.addr6 = sin6->sin6_addr;
4897             }
4898             entry->cfentry_family = AF_INET6;
4899             return TRUE;
4900         default:
4901             return FALSE;
4902     }
4903 }
4904
4905 static bool
4906 fill_cfil_hash_entry_from_inp(struct cfil_hash_entry *entry, bool isLocal, struct inpcb *inp)
4907 {
4908     if (entry == NULL || inp == NULL) {
4909         return FALSE;
4910     }
4911
4912     if (inp->inp_vflag & INP_IPV4) {
4913         if (isLocal == TRUE) {
4914             entry->cfentry_lport = inp->inp_lport;
4915             entry->cfentry_laddr.addr46.ia46_addr4.s_addr = inp->inp_laddr.s_addr;
4916         } else {
4917             entry->cfentry_fport = inp->inp_fport;
4918             entry->cfentry_faddr.addr46.ia46_addr4.s_addr = inp->inp_faddr.s_addr;
4919         }
4920         entry->cfentry_family = AF_INET;
4921         return TRUE;
4922     } else if (inp->inp_vflag & INP_IPV6) {
4923         if (isLocal == TRUE) {
4924             entry->cfentry_lport = inp->inp_lport;
4925             entry->cfentry_laddr.addr6 = inp->in6p_laddr;
4926         } else {
4927             entry->cfentry_fport = inp->inp_fport;
4928             entry->cfentry_faddr.addr6 = inp->in6p_faddr;
4929         }
4930         entry->cfentry_family = AF_INET6;
4931         return TRUE;
4932     }
4933     return FALSE;
4934 }
4935
4936 bool
4937 check_port(struct sockaddr *addr, u_short port)
4938 {
4939         struct sockaddr_in *sin = NULL;
4940         struct sockaddr_in6 *sin6 = NULL;
4941
4942         if (addr == NULL || port == 0) {
4943                 return FALSE;
4944         }
4945
4946         switch (addr->sa_family) {
4947                 case AF_INET:
4948                         sin = satosin(addr);
4949                         if (sin->sin_len != sizeof(*sin)) {
4950                                 return FALSE;
4951                         }
4952                         if (port == ntohs(sin->sin_port)) {
4953                                 return TRUE;
4954                         }
4955                         break;
4956                 case AF_INET6:
4957                         sin6 = satosin6(addr);
4958                         if (sin6->sin6_len != sizeof(*sin6)) {
4959                                 return FALSE;
4960                         }
4961                         if (port == ntohs(sin6->sin6_port)) {
4962                                 return TRUE;
4963                         }
4964                         break;
4965                 default:
4966                         break;
4967         }
4968         return FALSE;
4969 }
4970
4971 struct cfil_hash_entry *
4972 cfil_db_lookup_entry_with_sockid(struct cfil_db *db, u_int64_t sock_id)
4973 {
4974         struct cfilhashhead *cfilhash = NULL;
4975         u_int32_t flowhash = (u_int32_t)(sock_id & 0x0ffffffff);
4976         struct cfil_hash_entry *nextentry;
4977
4978         if (db == NULL || db->cfdb_hashbase == NULL || sock_id == 0) {
4979                 return NULL;
4980         }
4981
4982         flowhash &= db->cfdb_hashmask;
4983         cfilhash = &db->cfdb_hashbase[flowhash];
4984
4985         LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
4986                 if (nextentry->cfentry_cfil != NULL &&
4987                         nextentry->cfentry_cfil->cfi_sock_id == sock_id) {
4988                         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> matched <id %llu, hash %u>",
4989                                          (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), nextentry->cfentry_cfil->cfi_sock_id, flowhash);
4990                         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, nextentry, 0, "CFIL: UDP found entry");
4991                         return nextentry;
4992                 }
4993         }
4994
4995         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NOT matched <id %llu, hash %u>",
4996                          (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), sock_id, flowhash);
4997         return NULL;
4998 }
4999
5000 struct cfil_hash_entry *
5001 cfil_db_lookup_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5002 {
5003     struct cfil_hash_entry matchentry;
5004     struct cfil_hash_entry *nextentry = NULL;
5005     struct inpcb *inp = sotoinpcb(db->cfdb_so);
5006     u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5007     int inp_hash_element = 0;
5008     struct cfilhashhead *cfilhash = NULL;
5009
5010     CFIL_LOG(LOG_INFO, "");
5011
5012     if (inp == NULL) {
5013         goto done;
5014     }
5015
5016     if (local != NULL) {
5017         fill_cfil_hash_entry_from_address(&matchentry, TRUE, local);
5018     } else {
5019         fill_cfil_hash_entry_from_inp(&matchentry, TRUE, inp);
5020     }
5021     if (remote != NULL) {
5022         fill_cfil_hash_entry_from_address(&matchentry, FALSE, remote);
5023     } else {
5024         fill_cfil_hash_entry_from_inp(&matchentry, FALSE, inp);
5025     }
5026
5027 #if INET6
5028     if (inp->inp_vflag & INP_IPV6) {
5029         hashkey_faddr = matchentry.cfentry_faddr.addr6.s6_addr32[3];
5030         hashkey_laddr = matchentry.cfentry_laddr.addr6.s6_addr32[3];
5031     } else
5032 #endif /* INET6 */
5033     {
5034         hashkey_faddr = matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr;
5035         hashkey_laddr = matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr;
5036     }
5037
5038     inp_hash_element = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5039                                  matchentry.cfentry_lport, matchentry.cfentry_fport);
5040     inp_hash_element &= db->cfdb_hashmask;
5041
5042     cfilhash = &db->cfdb_hashbase[inp_hash_element];
5043
5044     LIST_FOREACH(nextentry, cfilhash, cfentry_link) {
5045
5046 #if INET6
5047         if ((inp->inp_vflag & INP_IPV6) &&
5048             nextentry->cfentry_lport == matchentry.cfentry_lport &&
5049             nextentry->cfentry_fport == matchentry.cfentry_fport &&
5050             IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_laddr.addr6, &matchentry.cfentry_laddr.addr6) &&
5051             IN6_ARE_ADDR_EQUAL(&nextentry->cfentry_faddr.addr6, &matchentry.cfentry_faddr.addr6)) {
5052 #if DATA_DEBUG
5053             cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V6 found entry");
5054 #endif
5055             return nextentry;
5056         } else
5057 #endif /* INET6 */
5058         if (nextentry->cfentry_lport == matchentry.cfentry_lport &&
5059             nextentry->cfentry_fport == matchentry.cfentry_fport &&
5060             nextentry->cfentry_laddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_laddr.addr46.ia46_addr4.s_addr &&
5061             nextentry->cfentry_faddr.addr46.ia46_addr4.s_addr == matchentry.cfentry_faddr.addr46.ia46_addr4.s_addr) {
5062 #if DATA_DEBUG
5063             cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP V4 found entry");
5064 #endif
5065             return nextentry;
5066         }
5067     }
5068
5069 done:
5070 #if DATA_DEBUG
5071     cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, &matchentry, 0, "CFIL LOOKUP ENTRY: UDP no entry found");
5072 #endif
5073     return NULL;
5074 }
5075
5076 void
5077 cfil_db_delete_entry(struct cfil_db *db, struct cfil_hash_entry *hash_entry)
5078 {
5079     if (hash_entry == NULL)
5080         return;
5081
5082     LIST_REMOVE(hash_entry, cfentry_link);
5083     zfree(cfil_hash_entry_zone, hash_entry);
5084     db->cfdb_count--;
5085     if (db->cfdb_only_entry == hash_entry)
5086         db->cfdb_only_entry = NULL;
5087 }
5088
5089 struct cfil_hash_entry *
5090 cfil_db_add_entry(struct cfil_db *db, struct sockaddr *local, struct sockaddr *remote)
5091 {
5092     struct cfil_hash_entry *entry = NULL;
5093     struct inpcb *inp = sotoinpcb(db->cfdb_so);
5094     u_int32_t hashkey_faddr = 0, hashkey_laddr = 0;
5095     int inp_hash_element = 0;
5096     struct cfilhashhead *cfilhash = NULL;
5097
5098     CFIL_LOG(LOG_INFO, "");
5099
5100     if (inp == NULL) {
5101         goto done;
5102     }
5103
5104     entry = zalloc(cfil_hash_entry_zone);
5105     if (entry == NULL) {
5106         goto done;
5107     }
5108     bzero(entry, sizeof(struct cfil_hash_entry));
5109
5110     if (local != NULL) {
5111         fill_cfil_hash_entry_from_address(entry, TRUE, local);
5112     } else {
5113         fill_cfil_hash_entry_from_inp(entry, TRUE, inp);
5114     }
5115     if (remote != NULL) {
5116         fill_cfil_hash_entry_from_address(entry, FALSE, remote);
5117     } else {
5118         fill_cfil_hash_entry_from_inp(entry, FALSE, inp);
5119     }
5120     entry->cfentry_lastused = net_uptime();
5121
5122 #if INET6
5123     if (inp->inp_vflag & INP_IPV6) {
5124         hashkey_faddr = entry->cfentry_faddr.addr6.s6_addr32[3];
5125         hashkey_laddr = entry->cfentry_laddr.addr6.s6_addr32[3];
5126     } else
5127 #endif /* INET6 */
5128     {
5129         hashkey_faddr = entry->cfentry_faddr.addr46.ia46_addr4.s_addr;
5130         hashkey_laddr = entry->cfentry_laddr.addr46.ia46_addr4.s_addr;
5131     }
5132     entry->cfentry_flowhash = CFIL_HASH(hashkey_laddr, hashkey_faddr,
5133                                         entry->cfentry_lport, entry->cfentry_fport);
5134     inp_hash_element = entry->cfentry_flowhash & db->cfdb_hashmask;
5135
5136     cfilhash = &db->cfdb_hashbase[inp_hash_element];
5137
5138     LIST_INSERT_HEAD(cfilhash, entry, cfentry_link);
5139     db->cfdb_count++;
5140         db->cfdb_only_entry = entry;
5141         cfil_hash_entry_log(LOG_DEBUG, db->cfdb_so, entry, 0, "CFIL: cfil_db_add_entry: ADDED");
5142
5143 done:
5144     CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> total count %d", (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), db->cfdb_count);
5145     return entry;
5146 }
5147
5148 struct cfil_info *
5149 cfil_db_get_cfil_info(struct cfil_db *db, cfil_sock_id_t id)
5150 {
5151     struct cfil_hash_entry *hash_entry = NULL;
5152
5153     CFIL_LOG(LOG_INFO, "");
5154
5155     if (db == NULL || id == 0) {
5156         CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> NULL DB <id %llu>",
5157                  (uint64_t)VM_KERNEL_ADDRPERM(db->cfdb_so), id);
5158         return NULL;
5159     }
5160
5161         // This is an optimization for connected UDP socket which only has one flow.
5162         // No need to do the hash lookup.
5163         if (db->cfdb_count == 1) {
5164                 if (db->cfdb_only_entry && db->cfdb_only_entry->cfentry_cfil &&
5165                         db->cfdb_only_entry->cfentry_cfil->cfi_sock_id == id) {
5166                         return (db->cfdb_only_entry->cfentry_cfil);
5167                 }
5168         }
5169
5170         hash_entry = cfil_db_lookup_entry_with_sockid(db, id);
5171         return (hash_entry != NULL ? hash_entry->cfentry_cfil : NULL);
5172 }
5173
5174 struct cfil_hash_entry *
5175 cfil_sock_udp_get_flow(struct socket *so, uint32_t filter_control_unit, bool outgoing, struct sockaddr *local, struct sockaddr *remote)
5176 {
5177 #pragma unused(so, filter_control_unit, outgoing, local, remote)
5178         struct cfil_hash_entry *hash_entry = NULL;
5179
5180         errno_t error = 0;
5181     socket_lock_assert_owned(so);
5182
5183         // If new socket, allocate cfil db
5184         if (so->so_cfil_db == NULL) {
5185                 if (cfil_db_init(so) != 0) {
5186                         return (NULL);
5187                 }
5188         }
5189
5190     // See if flow already exists.
5191     hash_entry = cfil_db_lookup_entry(so->so_cfil_db, local, remote);
5192     if (hash_entry != NULL) {
5193                 return (hash_entry);
5194     }
5195
5196     hash_entry = cfil_db_add_entry(so->so_cfil_db, local, remote);
5197     if (hash_entry == NULL) {
5198         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5199         CFIL_LOG(LOG_ERR, "CFIL: UDP failed to add entry");
5200                 return (NULL);
5201     }
5202
5203     if (cfil_info_alloc(so, hash_entry) == NULL ||
5204         hash_entry->cfentry_cfil == NULL) {
5205         cfil_db_delete_entry(so->so_cfil_db, hash_entry);
5206         CFIL_LOG(LOG_ERR, "CFIL: UDP failed to alloc cfil_info");
5207         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_no_mem);
5208         return (NULL);
5209     }
5210
5211 #if LIFECYCLE_DEBUG
5212         cfil_info_log(LOG_ERR, hash_entry->cfentry_cfil, "CFIL: LIFECYCLE: ADDED");
5213 #endif
5214
5215     if (cfil_info_attach_unit(so, filter_control_unit, hash_entry->cfentry_cfil) == 0) {
5216                 CFIL_LOG(LOG_ERR, "CFIL: UDP cfil_info_attach_unit(%u) failed",
5217                  filter_control_unit);
5218         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_failed);
5219                 return (NULL);
5220     }
5221     CFIL_LOG(LOG_DEBUG, "CFIL: UDP <so %llx> filter_control_unit %u sockID %llu attached",
5222              (uint64_t)VM_KERNEL_ADDRPERM(so),
5223              filter_control_unit, hash_entry->cfentry_cfil->cfi_sock_id);
5224
5225     so->so_flags |= SOF_CONTENT_FILTER;
5226     OSIncrementAtomic(&cfil_stats.cfs_sock_attached);
5227
5228     /* Hold a reference on the socket for each flow */
5229     so->so_usecount++;
5230
5231     error = cfil_dispatch_attach_event(so, hash_entry->cfentry_cfil, filter_control_unit);
5232     /* We can recover from flow control or out of memory errors */
5233     if (error != 0 && error != ENOBUFS && error != ENOMEM)
5234                 return (NULL);
5235
5236     CFIL_INFO_VERIFY(hash_entry->cfentry_cfil);
5237         return (hash_entry);
5238 }
5239
5240 errno_t
5241 cfil_sock_udp_handle_data(bool outgoing, struct socket *so,
5242                           struct sockaddr *local, struct sockaddr *remote,
5243                           struct mbuf *data, struct mbuf *control, uint32_t flags)
5244 {
5245 #pragma unused(outgoing, so, local, remote, data, control, flags)
5246     errno_t error = 0;
5247     uint32_t filter_control_unit;
5248         struct cfil_hash_entry *hash_entry = NULL;
5249         struct cfil_info *cfil_info = NULL;
5250
5251     socket_lock_assert_owned(so);
5252
5253     if (cfil_active_count == 0) {
5254         CFIL_LOG(LOG_DEBUG, "CFIL: UDP no active filter");
5255         OSIncrementAtomic(&cfil_stats.cfs_sock_attach_in_vain);
5256         return (error);
5257     }
5258
5259     filter_control_unit = necp_socket_get_content_filter_control_unit(so);
5260     if (filter_control_unit == 0) {
5261         CFIL_LOG(LOG_DEBUG, "CFIL: UDP failed to get control unit");
5262         return (error);
5263     }
5264
5265     if ((filter_control_unit & NECP_MASK_USERSPACE_ONLY) != 0) {
5266         CFIL_LOG(LOG_DEBUG, "CFIL: UDP user space only");
5267         OSIncrementAtomic(&cfil_stats.cfs_sock_userspace_only);
5268         return (error);
5269     }
5270
5271     hash_entry = cfil_sock_udp_get_flow(so, filter_control_unit, outgoing, local, remote);
5272     if (hash_entry == NULL || hash_entry->cfentry_cfil == NULL) {
5273                 CFIL_LOG(LOG_ERR, "CFIL: Falied to create UDP flow");
5274         return (EPIPE);
5275     }
5276         // Update last used timestamp, this is for flow Idle TO
5277         hash_entry->cfentry_lastused = net_uptime();
5278         cfil_info = hash_entry->cfentry_cfil;
5279
5280         if (cfil_info->cfi_flags & CFIF_DROP) {
5281 #if DATA_DEBUG
5282                 cfil_hash_entry_log(LOG_DEBUG, so, hash_entry, 0, "CFIL: UDP DROP");
5283 #endif
5284                 return (EPIPE);
5285         }
5286         if (control != NULL) {
5287                 OSIncrementAtomic(&cfil_stats.cfs_data_in_control);
5288         }
5289         if (data->m_type == MT_OOBDATA) {
5290                 CFIL_LOG(LOG_ERR, "so %llx MSG_OOB",
5291                                  (uint64_t)VM_KERNEL_ADDRPERM(so));
5292                 OSIncrementAtomic(&cfil_stats.cfs_data_in_oob);
5293         }
5294
5295         error = cfil_data_common(so, cfil_info, outgoing, remote, data, control, flags);
5296
5297         return (error);
5298 }
5299
5300 /*
5301  * Go through all UDP flows for specified socket and returns TRUE if
5302  * any flow is still attached.  If need_wait is TRUE, wait on first
5303  * attached flow.
5304  */
5305 static int
5306 cfil_filters_udp_attached(struct socket *so, bool need_wait)
5307 {
5308         struct timespec ts;
5309         lck_mtx_t *mutex_held;
5310         struct cfilhashhead *cfilhash = NULL;
5311         struct cfil_db *db = NULL;
5312         struct cfil_hash_entry *hash_entry = NULL;
5313         struct cfil_hash_entry *temp_hash_entry = NULL;
5314         struct cfil_info *cfil_info = NULL;
5315         struct cfil_entry *entry = NULL;
5316         errno_t error = 0;
5317         int kcunit;
5318         int attached = 0;
5319
5320         socket_lock_assert_owned(so);
5321
5322         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5323
5324                 if (so->so_proto->pr_getlock != NULL)
5325                         mutex_held = (*so->so_proto->pr_getlock)(so, PR_F_WILLUNLOCK);
5326                 else
5327                         mutex_held = so->so_proto->pr_domain->dom_mtx;
5328                 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);
5329
5330                 db = so->so_cfil_db;
5331
5332                 for (int i = 0; i < CFILHASHSIZE; i++) {
5333                         cfilhash = &db->cfdb_hashbase[i];
5334
5335                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5336
5337                                 if (hash_entry->cfentry_cfil != NULL) {
5338
5339                                         cfil_info = hash_entry->cfentry_cfil;
5340                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5341                                                 entry = &cfil_info->cfi_entries[kcunit - 1];
5342
5343                                                 /* Are we attached to the filter? */
5344                                                 if (entry->cfe_filter == NULL) {
5345                                                         continue;
5346                                                 }
5347
5348                                                 if ((entry->cfe_flags & CFEF_SENT_SOCK_ATTACHED) == 0)
5349                                                         continue;
5350                                                 if ((entry->cfe_flags & CFEF_CFIL_DETACHED) != 0)
5351                                                         continue;
5352
5353                                                 attached = 1;
5354
5355                                                 if (need_wait == TRUE) {
5356 #if LIFECYCLE_DEBUG
5357                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TO FINISH");
5358 #endif
5359
5360                                                         ts.tv_sec = cfil_close_wait_timeout / 1000;
5361                                                         ts.tv_nsec = (cfil_close_wait_timeout % 1000) *
5362                                                         NSEC_PER_USEC * 1000;
5363
5364                                                         OSIncrementAtomic(&cfil_stats.cfs_close_wait);
5365                                                         cfil_info->cfi_flags |= CFIF_CLOSE_WAIT;
5366                                                         error = msleep((caddr_t)cfil_info, mutex_held,
5367                                                                                    PSOCK | PCATCH, "cfil_filters_udp_attached", &ts);
5368                                                         cfil_info->cfi_flags &= ~CFIF_CLOSE_WAIT;
5369
5370 #if LIFECYCLE_DEBUG
5371                                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW DONE");
5372 #endif
5373
5374                                                         /*
5375                                                          * Force close in case of timeout
5376                                                          */
5377                                                         if (error != 0) {
5378                                                                 OSIncrementAtomic(&cfil_stats.cfs_close_wait_timeout);
5379 #if LIFECYCLE_DEBUG
5380                                                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: WAIT FOR FLOW TIMED OUT, FORCE DETACH");
5381 #endif
5382                                                                 entry->cfe_flags |= CFEF_CFIL_DETACHED;
5383                                                                 break;
5384                                                         }
5385                                                 }
5386                                                 goto done;
5387                                         }
5388                                 }
5389                         }
5390                 }
5391         }
5392
5393 done:
5394         return (attached);
5395 }
5396
5397 int32_t
5398 cfil_sock_udp_data_pending(struct sockbuf *sb, bool check_thread)
5399 {
5400         struct socket *so = sb->sb_so;
5401         struct cfi_buf *cfi_buf;
5402         uint64_t pending = 0;
5403         uint64_t total_pending = 0;
5404         struct cfilhashhead *cfilhash = NULL;
5405         struct cfil_db *db = NULL;
5406         struct cfil_hash_entry *hash_entry = NULL;
5407         struct cfil_hash_entry *temp_hash_entry = NULL;
5408
5409         socket_lock_assert_owned(so);
5410
5411         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL &&
5412                 (check_thread == FALSE || so->so_snd.sb_cfil_thread != current_thread())) {
5413
5414                 db = so->so_cfil_db;
5415
5416                 for (int i = 0; i < CFILHASHSIZE; i++) {
5417                         cfilhash = &db->cfdb_hashbase[i];
5418
5419                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5420
5421                                 if (hash_entry->cfentry_cfil != NULL) {
5422                                         if ((sb->sb_flags & SB_RECV) == 0)
5423                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_snd;
5424                                         else
5425                                                 cfi_buf = &hash_entry->cfentry_cfil->cfi_rcv;
5426
5427                                         pending = cfi_buf->cfi_pending_last - cfi_buf->cfi_pending_first;
5428                                         /*
5429                                          * If we are limited by the "chars of mbufs used" roughly
5430                                          * adjust so we won't overcommit
5431                                          */
5432                                         if ((uint64_t)cfi_buf->cfi_pending_mbcnt > pending)
5433                                                 pending = cfi_buf->cfi_pending_mbcnt;
5434
5435                                         total_pending += pending;
5436                                 }
5437                         }
5438                 }
5439
5440                 VERIFY(total_pending < INT32_MAX);
5441 #if DATA_DEBUG
5442                 CFIL_LOG(LOG_DEBUG, "CFIL: <so %llx> total pending %llu <check_thread %d>",
5443                                  (uint64_t)VM_KERNEL_ADDRPERM(so),
5444                                  total_pending, check_thread);
5445 #endif
5446         }
5447
5448         return (int32_t)(total_pending);
5449 }
5450
5451 int
5452 cfil_sock_udp_notify_shutdown(struct socket *so, int how, int drop_flag, int shut_flag)
5453 {
5454         struct cfil_info *cfil_info = NULL;
5455         struct cfilhashhead *cfilhash = NULL;
5456         struct cfil_db *db = NULL;
5457         struct cfil_hash_entry *hash_entry = NULL;
5458         struct cfil_hash_entry *temp_hash_entry = NULL;
5459         errno_t error = 0;
5460         int done_count = 0;
5461         int kcunit;
5462
5463         socket_lock_assert_owned(so);
5464
5465         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5466
5467                 db = so->so_cfil_db;
5468
5469                 for (int i = 0; i < CFILHASHSIZE; i++) {
5470                         cfilhash = &db->cfdb_hashbase[i];
5471
5472                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5473
5474                                 if (hash_entry->cfentry_cfil != NULL) {
5475                                         cfil_info = hash_entry->cfentry_cfil;
5476
5477                                         // This flow is marked as DROP
5478                                         if (cfil_info->cfi_flags & drop_flag) {
5479                                                 done_count++;
5480                                                 continue;
5481                                         }
5482
5483                                         // This flow has been shut already, skip
5484                                         if (cfil_info->cfi_flags & shut_flag) {
5485                                                 continue;
5486                                         }
5487                                         // Mark flow as shut
5488                                         cfil_info->cfi_flags |= shut_flag;
5489                                         done_count++;
5490
5491                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5492                                                 /* Disconnect incoming side */
5493                                                 if (how != SHUT_WR) {
5494                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 0);
5495                                                 }
5496                                                 /* Disconnect outgoing side */
5497                                                 if (how != SHUT_RD) {
5498                                                         error = cfil_dispatch_disconnect_event(so, cfil_info, kcunit, 1);
5499                                                 }
5500                                         }
5501                                 }
5502                         }
5503                 }
5504         }
5505
5506         if (done_count == 0) {
5507                 error = ENOTCONN;
5508         }
5509         return (error);
5510 }
5511
5512 int
5513 cfil_sock_udp_shutdown(struct socket *so, int *how)
5514 {
5515         int error = 0;
5516
5517         if ((so->so_flags & SOF_CONTENT_FILTER) == 0 || (so->so_cfil_db == NULL))
5518                 goto done;
5519
5520         socket_lock_assert_owned(so);
5521
5522         CFIL_LOG(LOG_INFO, "so %llx how %d",
5523                          (uint64_t)VM_KERNEL_ADDRPERM(so), *how);
5524
5525         /*
5526          * Check the state of the socket before the content filter
5527          */
5528         if (*how != SHUT_WR && (so->so_state & SS_CANTRCVMORE) != 0) {
5529                 /* read already shut down */
5530                 error = ENOTCONN;
5531                 goto done;
5532         }
5533         if (*how != SHUT_RD && (so->so_state & SS_CANTSENDMORE) != 0) {
5534                 /* write already shut down */
5535                 error = ENOTCONN;
5536                 goto done;
5537         }
5538
5539         /*
5540          * shutdown read: SHUT_RD or SHUT_RDWR
5541          */
5542         if (*how != SHUT_WR) {
5543                 error = cfil_sock_udp_notify_shutdown(so, SHUT_RD, CFIF_DROP, CFIF_SHUT_RD);
5544                 if (error != 0)
5545                         goto done;
5546         }
5547         /*
5548          * shutdown write: SHUT_WR or SHUT_RDWR
5549          */
5550         if (*how != SHUT_RD) {
5551                 error = cfil_sock_udp_notify_shutdown(so, SHUT_WR, CFIF_DROP, CFIF_SHUT_WR);
5552                 if (error != 0)
5553                         goto done;
5554
5555                 /*
5556                  * When outgoing data is pending, we delay the shutdown at the
5557                  * protocol level until the content filters give the final
5558                  * verdict on the pending data.
5559                  */
5560                 if (cfil_sock_data_pending(&so->so_snd) != 0) {
5561                         /*
5562                          * When shutting down the read and write sides at once
5563                          * we can proceed to the final shutdown of the read
5564                          * side. Otherwise, we just return.
5565                          */
5566                         if (*how == SHUT_WR) {
5567                                 error = EJUSTRETURN;
5568                         } else if (*how == SHUT_RDWR) {
5569                                 *how = SHUT_RD;
5570                         }
5571                 }
5572         }
5573 done:
5574         return (error);
5575 }
5576
5577 void
5578 cfil_sock_udp_close_wait(struct socket *so)
5579 {
5580         socket_lock_assert_owned(so);
5581
5582         while (cfil_filters_udp_attached(so, FALSE)) {
5583                 /*
5584                  * Notify the filters we are going away so they can detach
5585                  */
5586                 cfil_sock_udp_notify_shutdown(so, SHUT_RDWR, 0, 0);
5587
5588                 /*
5589                  * Make sure we need to wait after the filter are notified
5590                  * of the disconnection
5591                  */
5592                 if (cfil_filters_udp_attached(so, TRUE) == 0)
5593                         break;
5594         }
5595 }
5596
5597 void
5598 cfil_sock_udp_is_closed(struct socket *so)
5599 {
5600         struct cfil_info *cfil_info = NULL;
5601         struct cfilhashhead *cfilhash = NULL;
5602         struct cfil_db *db = NULL;
5603         struct cfil_hash_entry *hash_entry = NULL;
5604         struct cfil_hash_entry *temp_hash_entry = NULL;
5605         errno_t error = 0;
5606         int kcunit;
5607
5608         socket_lock_assert_owned(so);
5609
5610         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5611
5612                 db = so->so_cfil_db;
5613
5614                 for (int i = 0; i < CFILHASHSIZE; i++) {
5615                         cfilhash = &db->cfdb_hashbase[i];
5616
5617                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5618                                 if (hash_entry->cfentry_cfil != NULL) {
5619
5620                                         cfil_info = hash_entry->cfentry_cfil;
5621
5622                                         for (kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5623                                                 /* Let the filters know of the closing */
5624                                                 error = cfil_dispatch_closed_event(so, cfil_info, kcunit);
5625                                         }
5626
5627                                         /* Last chance to push passed data out */
5628                                         error = cfil_acquire_sockbuf(so, cfil_info, 1);
5629                                         if (error == 0)
5630                                                 cfil_service_inject_queue(so, cfil_info, 1);
5631                                         cfil_release_sockbuf(so, 1);
5632
5633                                         cfil_info->cfi_flags |= CFIF_SOCK_CLOSED;
5634
5635                                         /* Pending data needs to go */
5636                                         cfil_flush_queues(so, cfil_info);
5637
5638                                         CFIL_INFO_VERIFY(cfil_info);
5639                                 }
5640                         }
5641                 }
5642         }
5643 }
5644
5645 void
5646 cfil_sock_udp_buf_update(struct sockbuf *sb)
5647 {
5648         struct cfil_info *cfil_info = NULL;
5649         struct cfilhashhead *cfilhash = NULL;
5650         struct cfil_db *db = NULL;
5651         struct cfil_hash_entry *hash_entry = NULL;
5652         struct cfil_hash_entry *temp_hash_entry = NULL;
5653         errno_t error = 0;
5654         int outgoing;
5655         struct socket *so = sb->sb_so;
5656
5657         socket_lock_assert_owned(so);
5658
5659         if ((so->so_flags & SOF_CONTENT_FILTER) != 0 && so->so_cfil_db != NULL) {
5660
5661                 if (!cfil_sbtrim)
5662                         return;
5663
5664                 db = so->so_cfil_db;
5665
5666                 for (int i = 0; i < CFILHASHSIZE; i++) {
5667                         cfilhash = &db->cfdb_hashbase[i];
5668
5669                         LIST_FOREACH_SAFE(hash_entry, cfilhash, cfentry_link, temp_hash_entry) {
5670                                 if (hash_entry->cfentry_cfil != NULL) {
5671
5672                                         cfil_info = hash_entry->cfentry_cfil;
5673
5674                                         if ((sb->sb_flags & SB_RECV) == 0) {
5675                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT) == 0)
5676                                                         return;
5677                                                 outgoing = 1;
5678                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_out_retry);
5679                                         } else {
5680                                                 if ((cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN) == 0)
5681                                                         return;
5682                                                 outgoing = 0;
5683                                                 OSIncrementAtomic(&cfil_stats.cfs_inject_q_in_retry);
5684                                         }
5685
5686                                         CFIL_LOG(LOG_NOTICE, "so %llx outgoing %d",
5687                                                          (uint64_t)VM_KERNEL_ADDRPERM(so), outgoing);
5688
5689                                         error = cfil_acquire_sockbuf(so, cfil_info, outgoing);
5690                                         if (error == 0)
5691                                                 cfil_service_inject_queue(so, cfil_info, outgoing);
5692                                         cfil_release_sockbuf(so, outgoing);
5693                                 }
5694                         }
5695                 }
5696         }
5697 }
5698
5699 void
5700 cfil_filter_show(u_int32_t kcunit)
5701 {
5702         struct content_filter *cfc = NULL;
5703         struct cfil_entry *entry;
5704         int count = 0;
5705
5706         if (content_filters == NULL) {
5707                 return;
5708         }
5709         if (kcunit > MAX_CONTENT_FILTER) {
5710                 return;
5711         }
5712
5713         cfil_rw_lock_shared(&cfil_lck_rw);
5714
5715         if (content_filters[kcunit - 1] == NULL) {
5716                 cfil_rw_unlock_shared(&cfil_lck_rw);
5717                 return;
5718         }
5719         cfc = content_filters[kcunit - 1];
5720
5721         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter <unit %d, entry count %d> flags <%lx>:",
5722                          kcunit, cfc->cf_sock_count, (unsigned long)cfc->cf_flags);
5723         if (cfc->cf_flags & CFF_DETACHING)
5724                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - DETACHING");
5725         if (cfc->cf_flags & CFF_ACTIVE)
5726                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - ACTIVE");
5727         if (cfc->cf_flags & CFF_FLOW_CONTROLLED)
5728                 CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: - FLOW CONTROLLED");
5729
5730         TAILQ_FOREACH(entry, &cfc->cf_sock_entries, cfe_link) {
5731
5732                 if (entry->cfe_cfil_info && entry->cfe_cfil_info->cfi_so) {
5733                         struct cfil_info *cfil_info = entry->cfe_cfil_info;
5734
5735                         count++;
5736
5737                         if (entry->cfe_flags & CFEF_CFIL_DETACHED)
5738                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - DETACHED");
5739                         else
5740                                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: FILTER SHOW: - ATTACHED");
5741                 }
5742         }
5743
5744         CFIL_LOG(LOG_ERR, "CFIL: FILTER SHOW: Filter - total entries shown: %d", count);
5745
5746         cfil_rw_unlock_shared(&cfil_lck_rw);
5747
5748 }
5749
5750 void
5751 cfil_info_show(void)
5752 {
5753         struct cfil_info *cfil_info;
5754         int count = 0;
5755
5756         cfil_rw_lock_shared(&cfil_lck_rw);
5757
5758         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: count %d", cfil_sock_attached_count);
5759
5760         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5761
5762                 count++;
5763
5764                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: INFO SHOW");
5765
5766                 if (cfil_info->cfi_flags & CFIF_DROP)
5767                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - DROP");
5768                 if (cfil_info->cfi_flags & CFIF_CLOSE_WAIT)
5769                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - CLOSE_WAIT");
5770                 if (cfil_info->cfi_flags & CFIF_SOCK_CLOSED)
5771                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SOCK_CLOSED");
5772                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_IN)
5773                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_IN");
5774                 if (cfil_info->cfi_flags & CFIF_RETRY_INJECT_OUT)
5775                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - RETRY_INJECT_OUT");
5776                 if (cfil_info->cfi_flags & CFIF_SHUT_WR)
5777                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_WR");
5778                 if (cfil_info->cfi_flags & CFIF_SHUT_RD)
5779                         CFIL_LOG(LOG_ERR, "CFIL: INFO FLAG - SHUT_RD");
5780         }
5781
5782         CFIL_LOG(LOG_ERR, "CFIL: INFO SHOW: total cfil_info shown: %d", count);
5783
5784         cfil_rw_unlock_shared(&cfil_lck_rw);
5785 }
5786
5787 bool
5788 cfil_info_idle_timed_out(struct cfil_info *cfil_info, int timeout, u_int32_t current_time)
5789 {
5790         if (cfil_info && cfil_info->cfi_hash_entry &&
5791                 (current_time - cfil_info->cfi_hash_entry->cfentry_lastused >= (u_int32_t)timeout)) {
5792 #if GC_DEBUG
5793                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow IDLE timeout expired");
5794 #endif
5795                 return true;
5796         }
5797         return false;
5798 }
5799
5800 bool
5801 cfil_info_action_timed_out(struct cfil_info *cfil_info, int timeout)
5802 {
5803         struct cfil_entry *entry;
5804         struct timeval current_tv;
5805         struct timeval diff_time;
5806
5807         if (cfil_info == NULL)
5808                 return false;
5809
5810         /*
5811          * If we have queued up more data than passed offset and we haven't received
5812          * an action from user space for a while (the user space filter might have crashed),
5813          * return action timed out.
5814          */
5815         if (cfil_info->cfi_snd.cfi_pending_last > cfil_info->cfi_snd.cfi_pass_offset ||
5816                 cfil_info->cfi_rcv.cfi_pending_last > cfil_info->cfi_rcv.cfi_pass_offset) {
5817
5818                 microuptime(&current_tv);
5819
5820                 for (int kcunit = 1; kcunit <= MAX_CONTENT_FILTER; kcunit++) {
5821                         entry = &cfil_info->cfi_entries[kcunit - 1];
5822
5823                         if (entry->cfe_filter == NULL)
5824                                 continue;
5825
5826                         if (cfil_info->cfi_snd.cfi_pending_last > entry->cfe_snd.cfe_pass_offset ||
5827                                 cfil_info->cfi_rcv.cfi_pending_last > entry->cfe_rcv.cfe_pass_offset) {
5828                                 // haven't gotten an action from this filter, check timeout
5829                                 timersub(&current_tv, &entry->cfe_last_action, &diff_time);
5830                                 if (diff_time.tv_sec >= timeout) {
5831 #if GC_DEBUG
5832                                         cfil_info_log(LOG_ERR, cfil_info, "CFIL: flow ACTION timeout expired");
5833 #endif
5834                                         return true;
5835                                 }
5836                         }
5837                 }
5838         }
5839         return false;
5840 }
5841
5842 bool
5843 cfil_info_buffer_threshold_exceeded(struct cfil_info *cfil_info)
5844 {
5845         if (cfil_info == NULL)
5846                 return false;
5847
5848         /*
5849          * Clean up flow if it exceeded queue thresholds
5850          */
5851         if (cfil_info->cfi_snd.cfi_tail_drop_cnt ||
5852                 cfil_info->cfi_rcv.cfi_tail_drop_cnt) {
5853 #if GC_DEBUG
5854                 CFIL_LOG(LOG_ERR, "CFIL: queue threshold exceeded: mbuf max <count: %d bytes: %d> tail drop count <OUT: %d IN: %d>",
5855                                  cfil_udp_gc_mbuf_num_max,
5856                                  cfil_udp_gc_mbuf_cnt_max,
5857                                  cfil_info->cfi_snd.cfi_tail_drop_cnt,
5858                                  cfil_info->cfi_rcv.cfi_tail_drop_cnt);
5859                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: queue threshold exceeded");
5860 #endif
5861                 return true;
5862         }
5863
5864         return false;
5865 }
5866
5867 static void
5868 cfil_udp_gc_thread_sleep(bool forever)
5869 {
5870         if (forever) {
5871                 (void) assert_wait((event_t) &cfil_sock_udp_attached_count,
5872                                                    THREAD_INTERRUPTIBLE);
5873         } else {
5874                 uint64_t deadline = 0;
5875                 nanoseconds_to_absolutetime(UDP_FLOW_GC_RUN_INTERVAL_NSEC, &deadline);
5876                 clock_absolutetime_interval_to_deadline(deadline, &deadline);
5877
5878                 (void) assert_wait_deadline(&cfil_sock_udp_attached_count,
5879                                                                         THREAD_INTERRUPTIBLE, deadline);
5880         }
5881 }
5882
5883 static void
5884 cfil_udp_gc_thread_func(void *v, wait_result_t w)
5885 {
5886 #pragma unused(v, w)
5887
5888         ASSERT(cfil_udp_gc_thread == current_thread());
5889         thread_set_thread_name(current_thread(), "CFIL_UPD_GC");
5890
5891         // Kick off gc shortly
5892         cfil_udp_gc_thread_sleep(false);
5893         thread_block_parameter((thread_continue_t) cfil_info_udp_expire, NULL);
5894         /* NOTREACHED */
5895 }
5896
5897 static void
5898 cfil_info_udp_expire(void *v, wait_result_t w)
5899 {
5900 #pragma unused(v, w)
5901
5902         static uint64_t expired_array[UDP_FLOW_GC_MAX_COUNT];
5903         static uint32_t expired_count = 0;
5904
5905         struct cfil_info *cfil_info;
5906         struct cfil_hash_entry *hash_entry;
5907         struct cfil_db *db;
5908         struct socket *so;
5909         u_int32_t current_time = 0;
5910
5911         current_time = net_uptime();
5912
5913         // Get all expired UDP flow ids
5914         cfil_rw_lock_shared(&cfil_lck_rw);
5915
5916         if (cfil_sock_udp_attached_count == 0) {
5917                 cfil_rw_unlock_shared(&cfil_lck_rw);
5918                 goto go_sleep;
5919         }
5920
5921         TAILQ_FOREACH(cfil_info, &cfil_sock_head, cfi_link) {
5922                 if (expired_count >= UDP_FLOW_GC_MAX_COUNT)
5923                         break;
5924
5925                 if (IS_UDP(cfil_info->cfi_so)) {
5926                         if (cfil_info_idle_timed_out(cfil_info, UDP_FLOW_GC_IDLE_TO, current_time) ||
5927                                 cfil_info_action_timed_out(cfil_info, UDP_FLOW_GC_ACTION_TO) ||
5928                                 cfil_info_buffer_threshold_exceeded(cfil_info)) {
5929                                 expired_array[expired_count] = cfil_info->cfi_sock_id;
5930                                 expired_count++;
5931                         }
5932                 }
5933         }
5934         cfil_rw_unlock_shared(&cfil_lck_rw);
5935
5936         if (expired_count == 0)
5937                 goto go_sleep;
5938
5939         for (uint32_t i = 0; i < expired_count; i++) {
5940
5941                 // Search for socket (UDP only and lock so)
5942                 so = cfil_socket_from_sock_id(expired_array[i], true);
5943                 if (so == NULL) {
5944                         continue;
5945                 }
5946
5947                 cfil_info = cfil_db_get_cfil_info(so->so_cfil_db, expired_array[i]);
5948                 if (cfil_info == NULL) {
5949                         goto unlock;
5950                 }
5951
5952                 db = so->so_cfil_db;
5953                 hash_entry = cfil_info->cfi_hash_entry;
5954
5955                 if (db == NULL || hash_entry == NULL) {
5956                         goto unlock;
5957                 }
5958
5959 #if GC_DEBUG || LIFECYCLE_DEBUG
5960                 cfil_info_log(LOG_ERR, cfil_info, "CFIL: LIFECYCLE: GC CLEAN UP");
5961 #endif
5962
5963                 cfil_db_delete_entry(db, hash_entry);
5964                 cfil_info_free(cfil_info);
5965                 OSIncrementAtomic(&cfil_stats.cfs_sock_detached);
5966
5967                 if (so->so_flags & SOF_CONTENT_FILTER) {
5968                         if (db->cfdb_count == 0)
5969                                 so->so_flags &= ~SOF_CONTENT_FILTER;
5970                         VERIFY(so->so_usecount > 0);
5971                         so->so_usecount--;
5972                 }
5973 unlock:
5974                 socket_unlock(so, 1);
5975         }
5976
5977 #if GC_DEBUG
5978         CFIL_LOG(LOG_ERR, "CFIL: UDP flow idle timeout check: expired %d idle flows", expired_count);
5979 #endif
5980         expired_count = 0;
5981
5982 go_sleep:
5983
5984         // Sleep forever (until waken up) if no more UDP flow to clean
5985         cfil_rw_lock_shared(&cfil_lck_rw);
5986         cfil_udp_gc_thread_sleep(cfil_sock_udp_attached_count == 0 ? true : false);
5987         cfil_rw_unlock_shared(&cfil_lck_rw);
5988         thread_block_parameter((thread_continue_t)cfil_info_udp_expire, NULL);
5989         /* NOTREACHED */
5990 }
5991
5992 struct m_tag *
5993 cfil_udp_save_socket_state(struct cfil_info *cfil_info, struct mbuf *m)
5994 {
5995         struct m_tag *tag = NULL;
5996         struct cfil_tag *ctag = NULL;
5997         struct cfil_hash_entry *hash_entry = NULL;
5998
5999         if (cfil_info == NULL || cfil_info->cfi_so == NULL ||
6000                 cfil_info->cfi_hash_entry == NULL || m == NULL || !(m->m_flags & M_PKTHDR)) {
6001                 return NULL;
6002         }
6003
6004         /* Allocate a tag */
6005         tag = m_tag_create(KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP,
6006                                            sizeof(struct cfil_tag), M_DONTWAIT, m);
6007
6008         if (tag) {
6009                 ctag = (struct cfil_tag*)(tag + 1);
6010                 ctag->cfil_so_state_change_cnt = cfil_info->cfi_so->so_state_change_cnt;
6011                 ctag->cfil_so_options = cfil_info->cfi_so->so_options;
6012
6013                 hash_entry = cfil_info->cfi_hash_entry;
6014                 if (hash_entry->cfentry_family == AF_INET6) {
6015                         fill_ip6_sockaddr_4_6(&ctag->cfil_faddr,
6016                                                                   &hash_entry->cfentry_faddr.addr6,
6017                                                                   hash_entry->cfentry_fport);
6018                 } else if (hash_entry->cfentry_family == AF_INET) {
6019                         fill_ip_sockaddr_4_6(&ctag->cfil_faddr,
6020                                                                  hash_entry->cfentry_faddr.addr46.ia46_addr4,
6021                                                                  hash_entry->cfentry_fport);
6022                 }
6023                 m_tag_prepend(m, tag);
6024                 return (tag);
6025         }
6026         return NULL;
6027 }
6028
6029 struct m_tag *
6030 cfil_udp_get_socket_state(struct mbuf *m, uint32_t *state_change_cnt, short *options,
6031                                                   struct sockaddr **faddr)
6032 {
6033         struct m_tag *tag = NULL;
6034         struct cfil_tag *ctag = NULL;
6035
6036         tag = m_tag_locate(m, KERNEL_MODULE_TAG_ID, KERNEL_TAG_TYPE_CFIL_UDP, NULL);
6037         if (tag) {
6038                 ctag = (struct cfil_tag *)(tag + 1);
6039                 if (state_change_cnt)
6040                         *state_change_cnt = ctag->cfil_so_state_change_cnt;
6041                 if (options)
6042                         *options = ctag->cfil_so_options;
6043                 if (faddr)
6044                         *faddr = (struct sockaddr *) &ctag->cfil_faddr;
6045
6046                 /*
6047                  * Unlink tag and hand it over to caller.
6048                  * Note that caller will be responsible to free it.
6049                  */
6050                 m_tag_unlink(m, tag);
6051                 return tag;
6052         }
6053         return NULL;
6054 }
6055
6056