+ /*
+ * Allright, we have everything we need, so lets make that call.
+ *
+ * We keep special track of the return value from the file system:
+ * EAGAIN is an acceptable error condition that shouldn't keep us
+ * from copying out any results...
+ */
+
+ fserror = VNOP_SEARCHFS(vp,
+ searchparams1,
+ searchparams2,
+ &searchblock.searchattrs,
+ (u_long)searchblock.maxmatches,
+ &timelimit,
+ returnattrs,
+ &nummatches,
+ (u_long)uap->scriptcode,
+ (u_long)uap->options,
+ auio,
+ (struct searchstate *) &state->ss_fsstate,
+ ctx);
+
+ /*
+ * If it's a union mount we need to be called again
+ * to search the mounted-on filesystem.
+ */
+ if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
+ state->ss_union_flags = SRCHFS_START;
+ state->ss_union_layer++; // search next layer down
+ fserror = EAGAIN;
+ }
+
+saveandexit:
+
+ vnode_put(vp);
+
+ /* Now copy out the stuff that needs copying out. That means the number of matches, the
+ search state. Everything was already put into he return buffer by the vop call. */
+
+ if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
+ goto freeandexit;
+
+ if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
+ goto freeandexit;
+
+ error = fserror;
+
+freeandexit:
+
+ FREE(searchparams1,M_TEMP);
+
+ return(error);
+
+
+} /* end of searchfs system call */
+
+#else /* CONFIG_SEARCHFS */
+
+int
+searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval)
+{
+ return (ENOTSUP);
+}
+
+#endif /* CONFIG_SEARCHFS */
+
+
+lck_grp_attr_t * nspace_group_attr;
+lck_attr_t * nspace_lock_attr;
+lck_grp_t * nspace_mutex_group;
+
+lck_mtx_t nspace_handler_lock;
+lck_mtx_t nspace_handler_exclusion_lock;
+
+time_t snapshot_timestamp=0;
+int nspace_allow_virtual_devs=0;
+
+void nspace_handler_init(void);
+
+typedef struct nspace_item_info {
+ struct vnode *vp;
+ void *arg;
+ uint64_t op;
+ uint32_t vid;
+ uint32_t flags;
+ uint32_t token;
+ uint32_t refcount;
+} nspace_item_info;
+
+#define MAX_NSPACE_ITEMS 128
+nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
+uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
+uint32_t nspace_token_id=0;
+uint32_t nspace_handler_timeout = 15; // seconds
+
+#define NSPACE_ITEM_NEW 0x0001
+#define NSPACE_ITEM_PROCESSING 0x0002
+#define NSPACE_ITEM_DEAD 0x0004
+#define NSPACE_ITEM_CANCELLED 0x0008
+#define NSPACE_ITEM_DONE 0x0010
+#define NSPACE_ITEM_RESET_TIMER 0x0020
+
+#define NSPACE_ITEM_NSPACE_EVENT 0x0040
+#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
+
+#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT)
+
+//#pragma optimization_level 0
+
+typedef enum {
+ NSPACE_HANDLER_NSPACE = 0,
+ NSPACE_HANDLER_SNAPSHOT = 1,
+
+ NSPACE_HANDLER_COUNT,
+} nspace_type_t;
+
+typedef struct {
+ uint64_t handler_tid;
+ struct proc *handler_proc;
+ int handler_busy;
+} nspace_handler_t;
+
+nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
+
+/* namespace fsctl functions */
+static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
+static int nspace_item_flags_for_type(nspace_type_t nspace_type);
+static int nspace_open_flags_for_type(nspace_type_t nspace_type);
+static nspace_type_t nspace_type_for_op(uint64_t op);
+static int nspace_is_special_process(struct proc *proc);
+static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
+static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
+static int validate_namespace_args (int is64bit, int size);
+static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
+
+
+static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
+ default:
+ printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return NSPACE_ITEM_NSPACE_EVENT;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return NSPACE_ITEM_SNAPSHOT_EVENT;
+ default:
+ printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
+{
+ switch(nspace_type) {
+ case NSPACE_HANDLER_NSPACE:
+ return FREAD | FWRITE | O_EVTONLY;
+ case NSPACE_HANDLER_SNAPSHOT:
+ return FREAD | O_EVTONLY;
+ default:
+ printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
+ return 0;
+ }
+}
+
+static inline nspace_type_t nspace_type_for_op(uint64_t op)
+{
+ switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
+ case NAMESPACE_HANDLER_NSPACE_EVENT:
+ return NSPACE_HANDLER_NSPACE;
+ case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
+ return NSPACE_HANDLER_SNAPSHOT;
+ default:
+ printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
+ return NSPACE_HANDLER_NSPACE;
+ }
+}
+
+static inline int nspace_is_special_process(struct proc *proc)
+{
+ int i;
+ for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+ if (proc == nspace_handlers[i].handler_proc)
+ return 1;
+ }
+ return 0;
+}
+
+void
+nspace_handler_init(void)
+{
+ nspace_lock_attr = lck_attr_alloc_init();
+ nspace_group_attr = lck_grp_attr_alloc_init();
+ nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
+ lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
+ lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
+ memset(&nspace_items[0], 0, sizeof(nspace_items));
+}
+
+void
+nspace_proc_exit(struct proc *p)
+{
+ int i, event_mask = 0;
+
+ for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
+ if (p == nspace_handlers[i].handler_proc) {
+ event_mask |= nspace_item_flags_for_type(i);
+ nspace_handlers[i].handler_tid = 0;
+ nspace_handlers[i].handler_proc = NULL;
+ }
+ }
+
+ if (event_mask == 0) {
+ return;
+ }
+
+ if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
+ // if this process was the snapshot handler, zero snapshot_timeout
+ snapshot_timestamp = 0;
+ }
+
+ //
+ // unblock anyone that's waiting for the handler that died
+ //
+ lck_mtx_lock(&nspace_handler_lock);
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) {
+
+ if ( nspace_items[i].flags & event_mask ) {
+
+ if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
+ vnode_lock_spin(nspace_items[i].vp);
+ nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
+ vnode_unlock(nspace_items[i].vp);
+ }
+ nspace_items[i].vp = NULL;
+ nspace_items[i].vid = 0;
+ nspace_items[i].flags = NSPACE_ITEM_DONE;
+ nspace_items[i].token = 0;
+
+ wakeup((caddr_t)&(nspace_items[i].vp));
+ }
+ }
+ }
+
+ wakeup((caddr_t)&nspace_item_idx);
+ lck_mtx_unlock(&nspace_handler_lock);
+}
+
+
+int
+resolve_nspace_item(struct vnode *vp, uint64_t op)
+{
+ return resolve_nspace_item_ext(vp, op, NULL);
+}
+
+int
+resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg)
+{
+ int i, error, keep_waiting;
+ struct timespec ts;
+ nspace_type_t nspace_type = nspace_type_for_op(op);
+
+ // only allow namespace events on regular files, directories and symlinks.
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+ return 0;
+ }
+
+ //
+ // if this is a snapshot event and the vnode is on a
+ // disk image just pretend nothing happened since any
+ // change to the disk image will cause the disk image
+ // itself to get backed up and this avoids multi-way
+ // deadlocks between the snapshot handler and the ever
+ // popular diskimages-helper process. the variable
+ // nspace_allow_virtual_devs allows this behavior to
+ // be overridden (for use by the Mobile TimeMachine
+ // testing infrastructure which uses disk images)
+ //
+ if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
+ && (vp->v_mount != NULL)
+ && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
+ && !nspace_allow_virtual_devs) {
+
+ return 0;
+ }
+
+ // if (thread_tid(current_thread()) == namespace_handler_tid) {
+ if (nspace_handlers[nspace_type].handler_proc == NULL) {
+ return 0;
+ }
+
+ if (nspace_is_special_process(current_proc())) {
+ return EDEADLK;
+ }
+
+ lck_mtx_lock(&nspace_handler_lock);
+
+retry:
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
+ break;
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].flags == 0) {
+ break;
+ }
+ }
+ } else {
+ nspace_items[i].refcount++;
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ ts.tv_sec = nspace_handler_timeout;
+ ts.tv_nsec = 0;
+
+ error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts);
+ if (error == 0) {
+ // an entry got free'd up, go see if we can get a slot
+ goto retry;
+ } else {
+ lck_mtx_unlock(&nspace_handler_lock);
+ return error;
+ }
+ }
+
+ //
+ // if it didn't already exist, add it. if it did exist
+ // we'll get woken up when someone does a wakeup() on
+ // the slot in the nspace_items table.
+ //
+ if (vp != nspace_items[i].vp) {
+ nspace_items[i].vp = vp;
+ nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
+ nspace_items[i].op = op;
+ nspace_items[i].vid = vnode_vid(vp);
+ nspace_items[i].flags = NSPACE_ITEM_NEW;
+ nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type);
+ if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
+ if (arg) {
+ vnode_lock_spin(vp);
+ vp->v_flag |= VNEEDSSNAPSHOT;
+ vnode_unlock(vp);
+ }
+ }
+
+ nspace_items[i].token = 0;
+ nspace_items[i].refcount = 1;
+
+ wakeup((caddr_t)&nspace_item_idx);
+ }
+
+ //
+ // Now go to sleep until the handler does a wakeup on this
+ // slot in the nspace_items table (or we timeout).
+ //
+ keep_waiting = 1;
+ while(keep_waiting) {
+ ts.tv_sec = nspace_handler_timeout;
+ ts.tv_nsec = 0;
+ error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts);
+
+ if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
+ error = 0;
+ } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
+ error = nspace_items[i].token;
+ } else if (error == EWOULDBLOCK || error == ETIMEDOUT) {
+ if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
+ nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
+ continue;
+ } else {
+ error = ETIMEDOUT;
+ }
+ } else if (error == 0) {
+ // hmmm, why did we get woken up?
+ printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
+ nspace_items[i].token);
+ }
+
+ if (--nspace_items[i].refcount == 0) {
+ nspace_items[i].vp = NULL; // clear this so that no one will match on it again
+ nspace_items[i].arg = NULL;
+ nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
+ nspace_items[i].flags = 0; // this clears it for re-use
+ }
+ wakeup(&nspace_token_id);
+ keep_waiting = 0;
+ }
+
+ lck_mtx_unlock(&nspace_handler_lock);
+
+ return error;
+}
+
+
+int
+get_nspace_item_status(struct vnode *vp, int32_t *status)
+{
+ int i;
+
+ lck_mtx_lock(&nspace_handler_lock);
+ for(i=0; i < MAX_NSPACE_ITEMS; i++) {
+ if (nspace_items[i].vp == vp) {
+ break;
+ }
+ }
+
+ if (i >= MAX_NSPACE_ITEMS) {
+ lck_mtx_unlock(&nspace_handler_lock);
+ return ENOENT;
+ }
+
+ *status = nspace_items[i].flags;
+ lck_mtx_unlock(&nspace_handler_lock);
+ return 0;
+}
+
+
+#if 0
+static int
+build_volfs_path(struct vnode *vp, char *path, int *len)
+{
+ struct vnode_attr va;
+ int ret;
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_fsid);
+ VATTR_WANTED(&va, va_fileid);
+
+ if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
+ *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1;
+ ret = -1;
+ } else {
+ *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
+ ret = 0;
+ }
+
+ return ret;
+}
+#endif
+
+//
+// Note: this function does NOT check permissions on all of the
+// parent directories leading to this vnode. It should only be
+// called on behalf of a root process. Otherwise a process may
+// get access to a file because the file itself is readable even
+// though its parent directories would prevent access.
+//
+static int
+vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
+{
+ int error, action;
+
+ if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
+ return error;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_open(ctx, vp, fmode);
+ if (error)
+ return error;
+#endif
+
+ /* compute action to be authorized */
+ action = 0;
+ if (fmode & FREAD) {
+ action |= KAUTH_VNODE_READ_DATA;
+ }
+ if (fmode & (FWRITE | O_TRUNC)) {
+ /*
+ * If we are writing, appending, and not truncating,
+ * indicate that we are appending so that if the
+ * UF_APPEND or SF_APPEND bits are set, we do not deny
+ * the open.
+ */
+ if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
+ action |= KAUTH_VNODE_APPEND_DATA;
+ } else {
+ action |= KAUTH_VNODE_WRITE_DATA;
+ }
+ }
+
+ if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
+ return error;
+
+
+ //
+ // if the vnode is tagged VOPENEVT and the current process
+ // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
+ // flag to the open mode so that this open won't count against
+ // the vnode when carbon delete() does a vnode_isinuse() to see
+ // if a file is currently in use. this allows spotlight
+ // importers to not interfere with carbon apps that depend on
+ // the no-delete-if-busy semantics of carbon delete().
+ //
+ if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
+ fmode |= O_EVTONLY;
+ }
+
+ if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
+ return error;
+ }
+ if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
+ VNOP_CLOSE(vp, fmode, ctx);
+ return error;
+ }
+
+ /* Call out to allow 3rd party notification of open.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+#if CONFIG_MACF
+ mac_vnode_notify_open(ctx, vp, fmode);
+#endif
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
+ (uintptr_t)vp, 0);
+