]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/security/audit/audit_pipe.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / security / audit / audit_pipe.c
diff --git a/bsd/security/audit/audit_pipe.c b/bsd/security/audit/audit_pipe.c
new file mode 100644 (file)
index 0000000..9f64bba
--- /dev/null
@@ -0,0 +1,1140 @@
+/*-
+ * Copyright (c) 2006 Robert N. M. Watson
+ * Copyright (c) 2008-2009 Apple, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert Watson for the TrustedBSD Project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/fcntl.h>
+#include <sys/conf.h>
+#include <sys/poll.h>
+#include <sys/user.h>
+#include <sys/signalvar.h>
+#include <miscfs/devfs/devfs.h>
+
+#include <bsm/audit.h>
+#include <security/audit/audit.h>
+#include <security/audit/audit_ioctl.h>
+#include <security/audit/audit_bsd.h>
+#include <security/audit/audit_private.h>
+
+#if CONFIG_AUDIT
+/*
+ * Implementation of a clonable special device providing a live stream of BSM
+ * audit data.  Consumers receive a "tee" of the system audit trail by
+ * default, but may also define alternative event selections using ioctls.
+ * This interface provides unreliable but timely access to audit events.
+ * Consumers should be very careful to avoid introducing event cycles.
+ */
+
+/*
+ * Memory types.
+ */
+static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
+static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
+    "Audit pipe entries and buffers");
+static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
+    "Audit pipe preselection structure");
+
+/*
+ * Audit pipe buffer parameters.
+ */
+#define        AUDIT_PIPE_QLIMIT_DEFAULT       (128)
+#define        AUDIT_PIPE_QLIMIT_MIN           (1)
+#define        AUDIT_PIPE_QLIMIT_MAX           (1024)
+
+/*
+ * Description of an entry in an audit_pipe.
+ */
+struct audit_pipe_entry {
+       void                            *ape_record;
+       u_int                            ape_record_len;
+       TAILQ_ENTRY(audit_pipe_entry)    ape_queue;
+};
+
+/*
+ * Audit pipes allow processes to express "interest" in the set of records
+ * that are delivered via the pipe.  They do this in a similar manner to the
+ * mechanism for audit trail configuration, by expressing two global masks,
+ * and optionally expressing per-auid masks.  The following data structure is
+ * the per-auid mask description.  The global state is stored in the audit
+ * pipe data structure.
+ *
+ * We may want to consider a more space/time-efficient data structure once
+ * usage patterns for per-auid specifications are clear.
+ */
+struct audit_pipe_preselect {
+       au_id_t                                  app_auid;
+       au_mask_t                                app_mask;
+       TAILQ_ENTRY(audit_pipe_preselect)        app_list;
+};
+
+/*
+ * Description of an individual audit_pipe.  Consists largely of a bounded
+ * length queue.
+ */
+#define        AUDIT_PIPE_ASYNC        0x00000001
+#define        AUDIT_PIPE_NBIO         0x00000002
+struct audit_pipe {
+       int                              ap_open;       /* Device open? */
+       u_int                            ap_flags;
+
+       struct selinfo                   ap_selinfo;
+       pid_t                            ap_sigio;
+
+       /*
+        * Per-pipe mutex protecting most fields in this data structure.
+        */
+       struct mtx                       ap_mtx;
+
+       /*
+        * Per-pipe sleep lock serializing user-generated reads and flushes.
+        * uiomove() is called to copy out the current head record's data
+        * while the record remains in the queue, so we prevent other threads
+        * from removing it using this lock.
+        */
+       struct slck                      ap_sx;
+
+       /*
+        * Condition variable to signal when data has been delivered to a
+        * pipe.
+        */
+       struct cv                        ap_cv;
+
+       /*
+        * Various queue-related variables:  qlen and qlimit are a count of
+        * records in the queue; qbyteslen is the number of bytes of data
+        * across all records, and qoffset is the amount read so far of the
+        * first record in the queue.  The number of bytes available for
+        * reading in the queue is qbyteslen - qoffset.
+        */
+       u_int                            ap_qlen;
+       u_int                            ap_qlimit;
+       u_int                            ap_qbyteslen;
+       u_int                            ap_qoffset;
+
+       /*
+        * Per-pipe operation statistics.
+        */
+       u_int64_t                        ap_inserts;    /* Records added. */
+       u_int64_t                        ap_reads;      /* Records read. */
+       u_int64_t                        ap_drops;      /* Records dropped. */
+
+       /*
+        * Fields relating to pipe interest: global masks for unmatched
+        * processes (attributable, non-attributable), and a list of specific
+        * interest specifications by auid.
+        */
+       int                              ap_preselect_mode;
+       au_mask_t                        ap_preselect_flags;
+       au_mask_t                        ap_preselect_naflags;
+       TAILQ_HEAD(, audit_pipe_preselect)      ap_preselect_list;
+
+       /*
+        * Current pending record list.  Protected by a combination of ap_mtx
+        * and ap_sx.  Note particularly that *both* locks are required to
+        * remove a record from the head of the queue, as an in-progress read
+        * may sleep while copying and therefore cannot hold ap_mtx.
+        */
+       TAILQ_HEAD(, audit_pipe_entry)   ap_queue;
+
+       /*
+        * Global pipe list.
+        */
+       TAILQ_ENTRY(audit_pipe)          ap_list;
+};
+
+#define        AUDIT_PIPE_LOCK(ap)             mtx_lock(&(ap)->ap_mtx)
+#define        AUDIT_PIPE_LOCK_ASSERT(ap)      mtx_assert(&(ap)->ap_mtx, MA_OWNED)
+#define        AUDIT_PIPE_LOCK_DESTROY(ap)     mtx_destroy(&(ap)->ap_mtx)
+#define        AUDIT_PIPE_LOCK_INIT(ap)        mtx_init(&(ap)->ap_mtx, \
+                                           "audit_pipe_mtx", NULL, MTX_DEF)
+#define        AUDIT_PIPE_UNLOCK(ap)           mtx_unlock(&(ap)->ap_mtx)
+#define        AUDIT_PIPE_MTX(ap)              (&(ap)->ap_mtx)
+
+#define        AUDIT_PIPE_SX_LOCK_DESTROY(ap)  slck_destroy(&(ap)->ap_sx)
+#define        AUDIT_PIPE_SX_LOCK_INIT(ap)     slck_init(&(ap)->ap_sx, "audit_pipe_sx")
+#define        AUDIT_PIPE_SX_XLOCK_ASSERT(ap)  slck_assert(&(ap)->ap_sx, SA_XLOCKED)
+#define        AUDIT_PIPE_SX_XLOCK_SIG(ap)     slck_lock_sig(&(ap)->ap_sx)
+#define        AUDIT_PIPE_SX_XUNLOCK(ap)       slck_unlock(&(ap)->ap_sx)
+
+
+/*
+ * Global list of audit pipes, rwlock to protect it.  Individual record
+ * queues on pipes are protected by per-pipe locks; these locks synchronize
+ * between threads walking the list to deliver to individual pipes and add/
+ * remove of pipes, and are mostly acquired for read.
+ */
+static TAILQ_HEAD(, audit_pipe)         audit_pipe_list;
+static struct rwlock            audit_pipe_lock;
+
+#define        AUDIT_PIPE_LIST_LOCK_INIT()     rw_init(&audit_pipe_lock, \
+                                           "audit_pipe_list_lock")
+#define        AUDIT_PIPE_LIST_RLOCK()         rw_rlock(&audit_pipe_lock)
+#define        AUDIT_PIPE_LIST_RUNLOCK()       rw_runlock(&audit_pipe_lock)
+#define        AUDIT_PIPE_LIST_WLOCK()         rw_wlock(&audit_pipe_lock)
+#define        AUDIT_PIPE_LIST_WLOCK_ASSERT()  rw_assert(&audit_pipe_lock, \
+                                           RA_WLOCKED)
+#define        AUDIT_PIPE_LIST_WUNLOCK()       rw_wunlock(&audit_pipe_lock)
+
+/*
+ * Cloning related variables and constants.
+ */
+#define        AUDIT_PIPE_NAME         "auditpipe"
+#define        MAX_AUDIT_PIPES         32
+static int audit_pipe_major;
+
+/*
+ * dev_t doesn't have a pointer for "softc" data.  So we have to keep track of
+ * it with the following global array (indexed by the minor number).
+ *
+ * XXX We may want to dynamically grow this as needed.
+ */
+static struct audit_pipe       *audit_pipe_dtab[MAX_AUDIT_PIPES];
+
+
+/*
+ * Special device methods and definition.
+ */
+static open_close_fcn_t                audit_pipe_open;
+static open_close_fcn_t                audit_pipe_close;
+static read_write_fcn_t                audit_pipe_read;
+static ioctl_fcn_t             audit_pipe_ioctl;
+static select_fcn_t            audit_pipe_poll;
+
+static struct cdevsw audit_pipe_cdevsw = {
+       .d_open      =          audit_pipe_open,
+       .d_close     =          audit_pipe_close,
+       .d_read      =          audit_pipe_read,
+       .d_write     =          eno_rdwrt,
+       .d_ioctl     =          audit_pipe_ioctl,
+       .d_stop      =          eno_stop,
+       .d_reset     =          eno_reset,
+       .d_ttys      =          NULL,
+       .d_select    =          audit_pipe_poll,
+       .d_mmap      =          eno_mmap,
+       .d_strategy  =          eno_strat,
+       .d_type      =          0
+};
+
+/*
+ * Some global statistics on audit pipes.
+ */
+static int             audit_pipe_count;       /* Current number of pipes. */
+static u_int64_t       audit_pipe_ever;        /* Pipes ever allocated. */
+static u_int64_t       audit_pipe_records;     /* Records seen. */
+static u_int64_t       audit_pipe_drops;       /* Global record drop count. */
+
+/*
+ * Free an audit pipe entry.
+ */
+static void
+audit_pipe_entry_free(struct audit_pipe_entry *ape)
+{
+
+       free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
+       free(ape, M_AUDIT_PIPE_ENTRY);
+}
+
+/*
+ * Find an audit pipe preselection specification for an auid, if any.
+ */
+static struct audit_pipe_preselect *
+audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
+{
+       struct audit_pipe_preselect *app;
+
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
+               if (app->app_auid == auid)
+                       return (app);
+       }
+       return (NULL);
+}
+
+/*
+ * Query the per-pipe mask for a specific auid.
+ */
+static int
+audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
+    au_mask_t *maskp)
+{
+       struct audit_pipe_preselect *app;
+       int error;
+
+       AUDIT_PIPE_LOCK(ap);
+       app = audit_pipe_preselect_find(ap, auid);
+       if (app != NULL) {
+               *maskp = app->app_mask;
+               error = 0;
+       } else
+               error = ENOENT;
+       AUDIT_PIPE_UNLOCK(ap);
+       return (error);
+}
+
+/*
+ * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
+ * otherwise, update the current entry.
+ */
+static void
+audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
+{
+       struct audit_pipe_preselect *app, *app_new;
+
+       /*
+        * Pessimistically assume that the auid doesn't already have a mask
+        * set, and allocate.  We will free it if it is unneeded.
+        */
+       app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
+       AUDIT_PIPE_LOCK(ap);
+       app = audit_pipe_preselect_find(ap, auid);
+       if (app == NULL) {
+               app = app_new;
+               app_new = NULL;
+               app->app_auid = auid;
+               TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
+       }
+       app->app_mask = mask;
+       AUDIT_PIPE_UNLOCK(ap);
+       if (app_new != NULL)
+               free(app_new, M_AUDIT_PIPE_PRESELECT);
+}
+
+/*
+ * Delete a per-auid mask on an audit pipe.
+ */
+static int
+audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
+{
+       struct audit_pipe_preselect *app;
+       int error;
+
+       AUDIT_PIPE_LOCK(ap);
+       app = audit_pipe_preselect_find(ap, auid);
+       if (app != NULL) {
+               TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
+               error = 0;
+       } else
+               error = ENOENT;
+       AUDIT_PIPE_UNLOCK(ap);
+       if (app != NULL)
+               free(app, M_AUDIT_PIPE_PRESELECT);
+       return (error);
+}
+
+/*
+ * Delete all per-auid masks on an audit pipe.
+ */
+static void
+audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
+{
+       struct audit_pipe_preselect *app;
+
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
+               TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
+               free(app, M_AUDIT_PIPE_PRESELECT);
+       }
+}
+
+static void
+audit_pipe_preselect_flush(struct audit_pipe *ap)
+{
+
+       AUDIT_PIPE_LOCK(ap);
+       audit_pipe_preselect_flush_locked(ap);
+       AUDIT_PIPE_UNLOCK(ap);
+}
+
+/*-
+ * Determine whether a specific audit pipe matches a record with these
+ * properties.  Algorithm is as follows:
+ *
+ * - If the pipe is configured to track the default trail configuration, then
+ *   use the results of global preselection matching.
+ * - If not, search for a specifically configured auid entry matching the
+ *   event.  If an entry is found, use that.
+ * - Otherwise, use the default flags or naflags configured for the pipe.
+ */
+static int
+audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
+    au_event_t event, au_class_t class, int sorf, int trail_preselect)
+{
+       struct audit_pipe_preselect *app;
+
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       switch (ap->ap_preselect_mode) {
+       case AUDITPIPE_PRESELECT_MODE_TRAIL:
+               return (trail_preselect);
+
+       case AUDITPIPE_PRESELECT_MODE_LOCAL:
+               app = audit_pipe_preselect_find(ap, auid);
+               if (app == NULL) {
+                       if (auid == (uid_t)AU_DEFAUDITID)
+                               return (au_preselect(event, class,
+                                   &ap->ap_preselect_naflags, sorf));
+                       else
+                               return (au_preselect(event, class,
+                                   &ap->ap_preselect_flags, sorf));
+               } else
+                       return (au_preselect(event, class, &app->app_mask,
+                           sorf));
+
+       default:
+               panic("audit_pipe_preselect_check: mode %d",
+                   ap->ap_preselect_mode);
+       }
+
+       return (0);
+}
+
+/*
+ * Determine whether there exists a pipe interested in a record with specific
+ * properties.
+ */
+int
+audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
+    int sorf, int trail_preselect)
+{
+       struct audit_pipe *ap;
+
+       /* Lockless read to avoid acquiring the global lock if not needed. */
+       if (TAILQ_EMPTY(&audit_pipe_list))
+               return (0);
+
+       AUDIT_PIPE_LIST_RLOCK();
+       TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
+                AUDIT_PIPE_LOCK(ap);
+               if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
+                   trail_preselect)) {
+                       AUDIT_PIPE_UNLOCK(ap);
+                       AUDIT_PIPE_LIST_RUNLOCK();
+                       return (1);
+               }
+               AUDIT_PIPE_UNLOCK(ap);
+       }
+       AUDIT_PIPE_LIST_RUNLOCK();
+       return (0);
+}
+
+/*
+ * Append individual record to a queue -- allocate queue-local buffer, and
+ * add to the queue.  If the queue is full or we can't allocate memory, drop
+ * the newest record.
+ */
+static void
+audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
+{
+       struct audit_pipe_entry *ape;
+
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       if (ap->ap_qlen >= ap->ap_qlimit) {
+               ap->ap_drops++;
+               audit_pipe_drops++;
+               return;
+       }
+
+       ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
+       if (ape == NULL) {
+               ap->ap_drops++;
+               audit_pipe_drops++;
+               return;
+       }
+
+       ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
+       if (ape->ape_record == NULL) {
+               free(ape, M_AUDIT_PIPE_ENTRY);
+               ap->ap_drops++;
+               audit_pipe_drops++;
+               return;
+       }
+
+       bcopy(record, ape->ape_record, record_len);
+       ape->ape_record_len = record_len;
+
+       TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
+       ap->ap_inserts++;
+       ap->ap_qlen++;
+       ap->ap_qbyteslen += ape->ape_record_len;
+       selwakeup(&ap->ap_selinfo);
+       if (ap->ap_flags & AUDIT_PIPE_ASYNC)
+               pgsigio(ap->ap_sigio, SIGIO);
+#if 0  /* XXX - fix select */
+       selwakeuppri(&ap->ap_selinfo, PSOCK);
+       KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
+       if (ap->ap_flags & AUDIT_PIPE_ASYNC)
+               pgsigio(&ap->ap_sigio, SIGIO, 0);
+#endif
+       cv_broadcast(&ap->ap_cv);
+}
+
+/*
+ * audit_pipe_submit(): audit_worker submits audit records via this
+ * interface, which arranges for them to be delivered to pipe queues.
+ */
+void
+audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
+    int trail_select, void *record, u_int record_len)
+{
+       struct audit_pipe *ap;
+
+       /*
+        * Lockless read to avoid lock overhead if pipes are not in use.
+        */
+       if (TAILQ_FIRST(&audit_pipe_list) == NULL)
+               return;
+
+       AUDIT_PIPE_LIST_RLOCK();
+       TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
+               AUDIT_PIPE_LOCK(ap);
+               if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
+                   trail_select))
+                       audit_pipe_append(ap, record, record_len);
+               AUDIT_PIPE_UNLOCK(ap);
+       }
+       AUDIT_PIPE_LIST_RUNLOCK();
+
+       /* Unlocked increment. */
+       audit_pipe_records++;
+}
+
+/*
+ * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
+ * since we don't currently have selection information available, it is
+ * delivered to the pipe unconditionally.
+ *
+ * XXXRW: This is a bug.  The BSM check routine for submitting a user record
+ * should parse that information and return it.
+ */
+void
+audit_pipe_submit_user(void *record, u_int record_len)
+{
+       struct audit_pipe *ap;
+
+       /*
+        * Lockless read to avoid lock overhead if pipes are not in use.
+        */
+       if (TAILQ_FIRST(&audit_pipe_list) == NULL)
+               return;
+
+       AUDIT_PIPE_LIST_RLOCK();
+       TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
+               AUDIT_PIPE_LOCK(ap);
+               audit_pipe_append(ap, record, record_len);
+               AUDIT_PIPE_UNLOCK(ap);
+       }
+       AUDIT_PIPE_LIST_RUNLOCK();
+
+       /* Unlocked increment. */
+       audit_pipe_records++;
+}
+
+/*
+ * Allocate a new audit pipe.  Connects the pipe, on success, to the global
+ * list and updates statistics.
+ */
+static struct audit_pipe *
+audit_pipe_alloc(void)
+{
+       struct audit_pipe *ap;
+
+       AUDIT_PIPE_LIST_WLOCK_ASSERT();
+
+       ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
+       if (ap == NULL)
+               return (NULL);
+
+       ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
+       TAILQ_INIT(&ap->ap_queue);
+#ifndef  __APPLE__
+       knlist_init(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap), NULL, NULL,
+           NULL);
+#endif
+       AUDIT_PIPE_LOCK_INIT(ap);
+       AUDIT_PIPE_SX_LOCK_INIT(ap);
+       cv_init(&ap->ap_cv, "audit_pipe");
+
+       /*
+        * Default flags, naflags, and auid-specific preselection settings to
+        * 0.  Initialize the mode to the global trail so that if praudit(1)
+        * is run on /dev/auditpipe, it sees events associated with the
+        * default trail.  Pipe-aware application can clear the flag, set
+        * custom masks, and flush the pipe as needed.
+        */
+       bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
+       bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
+       TAILQ_INIT(&ap->ap_preselect_list);
+       ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
+
+       /*
+        * Add to global list and update global statistics.
+        */
+       TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
+       audit_pipe_count++;
+       audit_pipe_ever++;
+
+       return (ap);
+}
+
+/*
+ * Flush all records currently present in an audit pipe; assume mutex is held.
+ */
+static void
+audit_pipe_flush(struct audit_pipe *ap)
+{
+       struct audit_pipe_entry *ape;
+
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
+               TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
+               ap->ap_qbyteslen -= ape->ape_record_len;
+               audit_pipe_entry_free(ape);
+               ap->ap_qlen--;
+       }
+       ap->ap_qoffset = 0;
+
+       KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
+       KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
+}
+
+/*
+ * Free an audit pipe; this means freeing all preselection state and all
+ * records in the pipe.  Assumes global write lock and pipe mutex are held to
+ * revent any new records from being inserted during the free, and that the
+ * audit pipe is still on the global list.
+ */
+static void
+audit_pipe_free(struct audit_pipe *ap)
+{
+
+       AUDIT_PIPE_LIST_WLOCK_ASSERT();
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       audit_pipe_preselect_flush_locked(ap);
+       audit_pipe_flush(ap);
+       cv_destroy(&ap->ap_cv);
+       AUDIT_PIPE_SX_LOCK_DESTROY(ap);
+       AUDIT_PIPE_LOCK_DESTROY(ap);
+#ifndef  __APPLE__
+       knlist_destroy(&ap->ap_selinfo.si_note);
+#endif
+       TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
+       free(ap, M_AUDIT_PIPE);
+       audit_pipe_count--;
+}
+
+/*
+ * Audit pipe clone routine -- provides a new minor number, or to return (-1),
+ * if one can't be provided.  Called with DEVFS_LOCK held.
+ */
+static int
+audit_pipe_clone(__unused dev_t dev, int action)
+{
+       int i;
+
+       if (action == DEVFS_CLONE_ALLOC) {
+               for(i = 0; i < MAX_AUDIT_PIPES; i++)
+                       if (audit_pipe_dtab[i] == NULL)
+                               return (i);
+
+               /*
+                * XXX Should really return -1 here but that seems to hang
+                * things in devfs.  Instead return 0 and let _open() tell
+                * userland the bad news.
+                */
+               return (0);
+       }
+
+       return (-1);
+}
+
+/*
+ * Audit pipe open method.  Explicit privilege check isn't used as this
+ * allows file permissions on the special device to be used to grant audit
+ * review access.  Those file permissions should be managed carefully.
+ */
+static int
+audit_pipe_open(dev_t dev, __unused int flags,  __unused int devtype,
+    __unused proc_t p)
+{
+       struct audit_pipe *ap;
+       int u;
+
+       u = minor(dev);
+       if (u < 0 || u > MAX_AUDIT_PIPES)
+               return (ENXIO);
+
+       AUDIT_PIPE_LIST_WLOCK();
+       ap = audit_pipe_dtab[u];
+       if (ap == NULL) {
+               ap = audit_pipe_alloc();
+               if (ap == NULL) {
+                       AUDIT_PIPE_LIST_WUNLOCK();
+                       return (ENOMEM);
+               }
+               audit_pipe_dtab[u] = ap;
+       } else {
+               KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
+               AUDIT_PIPE_LIST_WUNLOCK();
+               return (EBUSY);
+       }
+       ap->ap_open = 1;
+       AUDIT_PIPE_LIST_WUNLOCK();
+#ifndef __APPLE__
+       fsetown(td->td_proc->p_pid, &ap->ap_sigio);
+#endif
+       return (0);
+}
+
+/*
+ * Close audit pipe, tear down all records, etc.
+ */
+static int
+audit_pipe_close(dev_t dev, __unused int flags, __unused int devtype,
+    __unused proc_t p)
+{
+       struct audit_pipe *ap;
+       int u;
+
+       u = minor(dev);
+       ap = audit_pipe_dtab[u];
+       KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
+       KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
+
+#ifndef __APPLE__
+       funsetown(&ap->ap_sigio);
+#endif
+       AUDIT_PIPE_LIST_WLOCK();
+       AUDIT_PIPE_LOCK(ap);
+       ap->ap_open = 0;
+       audit_pipe_free(ap);
+       audit_pipe_dtab[u] = NULL;
+       AUDIT_PIPE_LIST_WUNLOCK();
+       return (0);
+}
+
+/*
+ * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
+ * commands.
+ */
+static int
+audit_pipe_ioctl(dev_t dev, u_long cmd, caddr_t data,
+    __unused int flag, __unused proc_t p)
+{
+       struct auditpipe_ioctl_preselect *aip;
+       struct audit_pipe *ap;
+       au_mask_t *maskp;
+       int error, mode;
+       au_id_t auid;
+
+       ap = audit_pipe_dtab[minor(dev)];
+       KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
+
+       /*
+        * Audit pipe ioctls: first come standard device node ioctls, then
+        * manipulation of pipe settings, and finally, statistics query
+        * ioctls.
+        */
+       switch (cmd) {
+       case FIONBIO:
+               AUDIT_PIPE_LOCK(ap);
+               if (*(int *)data)
+                       ap->ap_flags |= AUDIT_PIPE_NBIO;
+               else
+                       ap->ap_flags &= ~AUDIT_PIPE_NBIO;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case FIONREAD:
+               AUDIT_PIPE_LOCK(ap);
+               *(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case FIOASYNC:
+               AUDIT_PIPE_LOCK(ap);
+               if (*(int *)data)
+                       ap->ap_flags |= AUDIT_PIPE_ASYNC;
+               else
+                       ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+#ifndef __APPLE__
+       case FIOSETOWN:
+               error = fsetown(*(int *)data, &ap->ap_sigio);
+               break;
+
+       case FIOGETOWN:
+               *(int *)data = fgetown(&ap->ap_sigio);
+               error = 0;
+               break;
+#endif /* !__APPLE__ */
+
+       case AUDITPIPE_GET_QLEN:
+               *(u_int *)data = ap->ap_qlen;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_QLIMIT:
+               *(u_int *)data = ap->ap_qlimit;
+               error = 0;
+               break;
+
+       case AUDITPIPE_SET_QLIMIT:
+               /* Lockless integer write. */
+               if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
+                   *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
+                       ap->ap_qlimit = *(u_int *)data;
+                       error = 0;
+               } else
+                       error = EINVAL;
+               break;
+
+       case AUDITPIPE_GET_QLIMIT_MIN:
+               *(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_QLIMIT_MAX:
+               *(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_PRESELECT_FLAGS:
+               AUDIT_PIPE_LOCK(ap);
+               maskp = (au_mask_t *)data;
+               *maskp = ap->ap_preselect_flags;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_SET_PRESELECT_FLAGS:
+               AUDIT_PIPE_LOCK(ap);
+               maskp = (au_mask_t *)data;
+               ap->ap_preselect_flags = *maskp;
+               AUDIT_CHECK_IF_KEVENTS_MASK(ap->ap_preselect_flags);
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_PRESELECT_NAFLAGS:
+               AUDIT_PIPE_LOCK(ap);
+               maskp = (au_mask_t *)data;
+               *maskp = ap->ap_preselect_naflags;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_SET_PRESELECT_NAFLAGS:
+               AUDIT_PIPE_LOCK(ap);
+               maskp = (au_mask_t *)data;
+               ap->ap_preselect_naflags = *maskp;
+               AUDIT_CHECK_IF_KEVENTS_MASK(ap->ap_preselect_naflags);
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_PRESELECT_AUID:
+               aip = (struct auditpipe_ioctl_preselect *)data;
+               error = audit_pipe_preselect_get(ap, aip->aip_auid,
+                   &aip->aip_mask);
+               break;
+
+       case AUDITPIPE_SET_PRESELECT_AUID:
+               aip = (struct auditpipe_ioctl_preselect *)data;
+               audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
+               error = 0;
+               break;
+
+       case AUDITPIPE_DELETE_PRESELECT_AUID:
+               auid = *(au_id_t *)data;
+               error = audit_pipe_preselect_delete(ap, auid);
+               break;
+
+       case AUDITPIPE_FLUSH_PRESELECT_AUID:
+               audit_pipe_preselect_flush(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_PRESELECT_MODE:
+               AUDIT_PIPE_LOCK(ap);
+               *(int *)data = ap->ap_preselect_mode;
+               AUDIT_PIPE_UNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_SET_PRESELECT_MODE:
+               mode = *(int *)data;
+               switch (mode) {
+               case AUDITPIPE_PRESELECT_MODE_TRAIL:
+               case AUDITPIPE_PRESELECT_MODE_LOCAL:
+                       AUDIT_PIPE_LOCK(ap);
+                       ap->ap_preselect_mode = mode;
+                       AUDIT_PIPE_UNLOCK(ap);
+                       error = 0;
+                       break;
+
+               default:
+                       error = EINVAL;
+               }
+               break;
+
+       case AUDITPIPE_FLUSH:
+               if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
+                       return (EINTR);
+               AUDIT_PIPE_LOCK(ap);
+               audit_pipe_flush(ap);
+               AUDIT_PIPE_UNLOCK(ap);
+               AUDIT_PIPE_SX_XUNLOCK(ap);
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_MAXAUDITDATA:
+               *(u_int *)data = MAXAUDITDATA;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_INSERTS:
+               *(u_int *)data = ap->ap_inserts;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_READS:
+               *(u_int *)data = ap->ap_reads;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_DROPS:
+               *(u_int *)data = ap->ap_drops;
+               error = 0;
+               break;
+
+       case AUDITPIPE_GET_TRUNCATES:
+               *(u_int *)data = 0;
+               error = 0;
+               break;
+
+       default:
+               error = ENOTTY;
+       }
+       return (error);
+}
+
+/*
+ * Audit pipe read.  Read one or more partial or complete records to user
+ * memory.
+ */
+static int
+audit_pipe_read(dev_t dev, struct uio *uio, __unused int flag)
+{
+       struct audit_pipe_entry *ape;
+       struct audit_pipe *ap;
+       u_int toread;
+       int error;
+
+       ap = audit_pipe_dtab[minor(dev)];
+       KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
+
+       /*
+        * We hold an sleep lock over read and flush because we rely on the
+        * stability of a record in the queue during uiomove(9).
+        */
+       if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
+               return (EINTR);
+       AUDIT_PIPE_LOCK(ap);
+       while (TAILQ_EMPTY(&ap->ap_queue)) {
+               if (ap->ap_flags & AUDIT_PIPE_NBIO) {
+                       AUDIT_PIPE_UNLOCK(ap);
+                       AUDIT_PIPE_SX_XUNLOCK(ap);
+                       return (EAGAIN);
+               }
+               error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
+               if (error) {
+                       AUDIT_PIPE_UNLOCK(ap);
+                       AUDIT_PIPE_SX_XUNLOCK(ap);
+                       return (error);
+               }
+       }
+
+       /*
+        * Copy as many remaining bytes from the current record to userspace
+        * as we can.  Keep processing records until we run out of records in
+        * the queue, or until the user buffer runs out of space.
+        *
+        * Note: we rely on the sleep lock to maintain ape's stability here.
+        */
+       ap->ap_reads++;
+       while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
+           uio_resid(uio) > 0) {
+               AUDIT_PIPE_LOCK_ASSERT(ap);
+
+               KASSERT(ape->ape_record_len > ap->ap_qoffset,
+                   ("audit_pipe_read: record_len > qoffset (1)"));
+               toread = MIN(ape->ape_record_len - ap->ap_qoffset,
+                   uio_resid(uio));
+               AUDIT_PIPE_UNLOCK(ap);
+               error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
+                   toread, uio);
+               if (error) {
+                       AUDIT_PIPE_SX_XUNLOCK(ap);
+                       return (error);
+               }
+
+               /*
+                * If the copy succeeded, update book-keeping, and if no
+                * bytes remain in the current record, free it.
+                */
+               AUDIT_PIPE_LOCK(ap);
+               KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
+                   ("audit_pipe_read: queue out of sync after uiomove"));
+               ap->ap_qoffset += toread;
+               KASSERT(ape->ape_record_len >= ap->ap_qoffset,
+                   ("audit_pipe_read: record_len >= qoffset (2)"));
+               if (ap->ap_qoffset == ape->ape_record_len) {
+                       TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
+                       ap->ap_qbyteslen -= ape->ape_record_len;
+                       audit_pipe_entry_free(ape);
+                       ap->ap_qlen--;
+                       ap->ap_qoffset = 0;
+               }
+       }
+       AUDIT_PIPE_UNLOCK(ap);
+       AUDIT_PIPE_SX_XUNLOCK(ap);
+       return (0);
+}
+
+/*
+ * Audit pipe poll.
+ */
+static int
+audit_pipe_poll(dev_t dev, int events, void *wql, struct proc *p)
+{
+       struct audit_pipe *ap;
+       int revents;
+
+       revents = 0;
+       ap = audit_pipe_dtab[minor(dev)];
+       KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
+
+       if (events & (POLLIN | POLLRDNORM)) {
+               AUDIT_PIPE_LOCK(ap);
+               if (TAILQ_FIRST(&ap->ap_queue) != NULL)
+                       revents |= events & (POLLIN | POLLRDNORM);
+               else
+                       selrecord(p, &ap->ap_selinfo, wql);
+               AUDIT_PIPE_UNLOCK(ap);
+       }
+       return (revents);
+}
+
+#ifndef __APPLE__
+/*
+ * Return true if there are records available for reading on the pipe.
+ */
+static int
+audit_pipe_kqread(struct knote *kn, long hint)
+{
+       struct audit_pipe *ap;
+
+       ap = (struct audit_pipe *)kn->kn_hook;
+       KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
+       AUDIT_PIPE_LOCK_ASSERT(ap);
+
+       if (ap->ap_qlen != 0) {
+               kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
+               return (1);
+       } else {
+               kn->kn_data = 0;
+               return (0);
+       }
+}
+
+/*
+ * Detach kqueue state from audit pipe.
+ */
+static void
+audit_pipe_kqdetach(struct knote *kn)
+{
+       struct audit_pipe *ap;
+
+       ap = (struct audit_pipe *)kn->kn_hook;
+       KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
+
+       AUDIT_PIPE_LOCK(ap);
+       knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
+       AUDIT_PIPE_UNLOCK(ap);
+}
+#endif /* !__APPLE__ */
+
+static void *devnode;
+
+int
+audit_pipe_init(void)
+{
+       dev_t dev;
+
+       TAILQ_INIT(&audit_pipe_list);
+       AUDIT_PIPE_LIST_LOCK_INIT();
+
+       audit_pipe_major = cdevsw_add(-1, &audit_pipe_cdevsw);
+       if (audit_pipe_major < 0)
+         return (KERN_FAILURE);
+
+       dev = makedev(audit_pipe_major, 0);
+       devnode = devfs_make_node_clone(dev, DEVFS_CHAR, UID_ROOT, GID_WHEEL,
+           0600, audit_pipe_clone, "auditpipe", 0);
+
+       if (devnode == NULL)
+               return (KERN_FAILURE);
+
+       return (KERN_SUCCESS);
+}
+
+int
+audit_pipe_shutdown(void)
+{
+
+       /* unwind everything */
+       devfs_remove(devnode);
+       (void) cdevsw_remove(audit_pipe_major, &audit_pipe_cdevsw);
+
+       return (KERN_SUCCESS);
+}
+
+#endif /* CONFIG_AUDIT */