+
+ /*
+ * Place mp on top of vnode, ref the vnode, call checkdirs(),
+ * and increment the name cache's mount generation
+ */
+
+ IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
+ error = place_mount_and_checkdirs(mp, vp, ctx);
+ if (error != 0) {
+ goto out2;
+ }
+
+ placed = TRUE;
+
+ strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
+ strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
+
+ /* Forbid future moves */
+ mount_lock(mp);
+ mp->mnt_kern_flag |= MNTK_HAS_MOVED;
+ mount_unlock(mp);
+
+ /* Finally, add to mount list, completely ready to go */
+ if (mount_list_add(mp) != 0) {
+ /*
+ * The system is shutting down trying to umount
+ * everything, so fail with a plausible errno.
+ */
+ error = EBUSY;
+ goto out3;
+ }
+
+ mount_end_update(mp);
+ vnode_put(rvp);
+ FREE(old_mntonname, M_TEMP);
+
+ vfs_notify_mount(pvp);
+
+ return 0;
+out3:
+ strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
+
+ mount_lock(mp);
+ mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
+ mount_unlock(mp);
+
+out2:
+ /*
+ * Placing the mp on the vnode clears VMOUNT,
+ * so cleanup is different after that point
+ */
+ if (placed) {
+ /* Rele the vp, clear VMOUNT and v_mountedhere */
+ undo_place_on_covered_vp(mp, vp);
+ } else {
+ vnode_lock_spin(vp);
+ CLR(vp->v_flag, VMOUNT);
+ vnode_unlock(vp);
+ }
+out1:
+ mount_end_update(mp);
+
+out0:
+ vnode_put(rvp);
+ FREE(old_mntonname, M_TEMP);
+ return error;
+}
+
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+void
+enablequotas(struct mount *mp, vfs_context_t ctx)
+{
+ struct nameidata qnd;
+ int type;
+ char qfpath[MAXPATHLEN];
+ const char *qfname = QUOTAFILENAME;
+ const char *qfopsname = QUOTAOPSNAME;
+ const char *qfextension[] = INITQFNAMES;
+
+ /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
+ if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
+ return;
+ }
+ /*
+ * Enable filesystem disk quotas if necessary.
+ * We ignore errors as this should not interfere with final mount
+ */
+ for (type=0; type < MAXQUOTAS; type++) {
+ snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
+ NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
+ CAST_USER_ADDR_T(qfpath), ctx);
+ if (namei(&qnd) != 0)
+ continue; /* option file to trigger quotas is not present */
+ vnode_put(qnd.ni_vp);
+ nameidone(&qnd);
+ snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
+
+ (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
+ }
+ return;
+}
+
+
+static int
+checkdirs_callback(proc_t p, void * arg)
+{
+ struct cdirargs * cdrp = (struct cdirargs * )arg;
+ vnode_t olddp = cdrp->olddp;
+ vnode_t newdp = cdrp->newdp;
+ struct filedesc *fdp;
+ vnode_t tvp;
+ vnode_t fdp_cvp;
+ vnode_t fdp_rvp;
+ int cdir_changed = 0;
+ int rdir_changed = 0;
+
+ /*
+ * XXX Also needs to iterate each thread in the process to see if it
+ * XXX is using a per-thread current working directory, and, if so,
+ * XXX update that as well.
+ */
+
+ proc_fdlock(p);
+ fdp = p->p_fd;
+ if (fdp == (struct filedesc *)0) {
+ proc_fdunlock(p);
+ return(PROC_RETURNED);
+ }
+ fdp_cvp = fdp->fd_cdir;
+ fdp_rvp = fdp->fd_rdir;
+ proc_fdunlock(p);
+
+ if (fdp_cvp == olddp) {
+ vnode_ref(newdp);
+ tvp = fdp->fd_cdir;
+ fdp_cvp = newdp;
+ cdir_changed = 1;
+ vnode_rele(tvp);
+ }
+ if (fdp_rvp == olddp) {
+ vnode_ref(newdp);
+ tvp = fdp->fd_rdir;
+ fdp_rvp = newdp;
+ rdir_changed = 1;
+ vnode_rele(tvp);
+ }
+ if (cdir_changed || rdir_changed) {
+ proc_fdlock(p);
+ fdp->fd_cdir = fdp_cvp;
+ fdp->fd_rdir = fdp_rvp;
+ proc_fdunlock(p);
+ }
+ return(PROC_RETURNED);
+}
+
+
+
+/*
+ * Scan all active processes to see if any of them have a current
+ * or root directory onto which the new filesystem has just been
+ * mounted. If so, replace them with the new mount point.
+ */
+static int
+checkdirs(vnode_t olddp, vfs_context_t ctx)
+{
+ vnode_t newdp;
+ vnode_t tvp;
+ int err;
+ struct cdirargs cdr;
+
+ if (olddp->v_usecount == 1)
+ return(0);
+ err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
+
+ if (err != 0) {
+#if DIAGNOSTIC
+ panic("mount: lost mount: error %d", err);
+#endif
+ return(err);
+ }
+
+ cdr.olddp = olddp;
+ cdr.newdp = newdp;
+ /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
+ proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
+
+ if (rootvnode == olddp) {
+ vnode_ref(newdp);
+ tvp = rootvnode;
+ rootvnode = newdp;
+ vnode_rele(tvp);
+ }
+
+ vnode_put(newdp);
+ return(0);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+/* ARGSUSED */
+int
+unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+ mp = vp->v_mount;
+ nameidone(&nd);
+
+#if CONFIG_MACF
+ error = mac_mount_check_umount(ctx, mp);
+ if (error != 0) {
+ vnode_put(vp);
+ return (error);
+ }
+#endif
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vnode_put(vp);
+ return (EINVAL);
+ }
+ mount_ref(mp, 0);
+ vnode_put(vp);
+ /* safedounmount consumes the mount ref */
+ return (safedounmount(mp, uap->flags, ctx));
+}
+
+int
+vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
+{
+ mount_t mp;
+
+ mp = mount_list_lookupby_fsid(fsid, 0, 1);
+ if (mp == (mount_t)0) {
+ return(ENOENT);
+ }
+ mount_ref(mp, 0);
+ mount_iterdrop(mp);
+ /* safedounmount consumes the mount ref */
+ return(safedounmount(mp, flags, ctx));
+}
+
+
+/*
+ * The mount struct comes with a mount ref which will be consumed.
+ * Do the actual file system unmount, prevent some common foot shooting.
+ */
+int
+safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
+{
+ int error;
+ proc_t p = vfs_context_proc(ctx);
+
+ /*
+ * If the file system is not responding and MNT_NOBLOCK
+ * is set and not a forced unmount then return EBUSY.
+ */
+ if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
+ (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
+ error = EBUSY;
+ goto out;
+ }
+
+ /*
+ * Skip authorization if the mount is tagged as permissive and
+ * this is not a forced-unmount attempt.
+ */
+ if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
+ /*
+ * Only root, or the user that did the original mount is
+ * permitted to unmount this filesystem.
+ */
+ if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
+ (error = suser(kauth_cred_get(), &p->p_acflag)))
+ goto out;
+ }
+ /*
+ * Don't allow unmounting the root file system.
+ */
+ if (mp->mnt_flag & MNT_ROOTFS) {
+ error = EBUSY; /* the root is always busy */
+ goto out;
+ }
+
+#ifdef CONFIG_IMGSRC_ACCESS
+ if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
+ error = EBUSY;
+ goto out;
+ }
+#endif /* CONFIG_IMGSRC_ACCESS */
+
+ return (dounmount(mp, flags, 1, ctx));
+
+out:
+ mount_drop(mp, 0);
+ return(error);
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+int
+dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
+{
+ vnode_t coveredvp = (vnode_t)0;
+ int error;
+ int needwakeup = 0;
+ int forcedunmount = 0;
+ int lflags = 0;
+ struct vnode *devvp = NULLVP;
+#if CONFIG_TRIGGERS
+ proc_t p = vfs_context_proc(ctx);
+ int did_vflush = 0;
+ int pflags_save = 0;
+#endif /* CONFIG_TRIGGERS */
+
+ mount_lock(mp);
+
+ /*
+ * If already an unmount in progress just return EBUSY.
+ * Even a forced unmount cannot override.
+ */
+ if (mp->mnt_lflag & MNT_LUNMOUNT) {
+ if (withref != 0)
+ mount_drop(mp, 1);
+ mount_unlock(mp);
+ return (EBUSY);
+ }
+
+ if (flags & MNT_FORCE) {
+ forcedunmount = 1;
+ mp->mnt_lflag |= MNT_LFORCE;
+ }
+
+#if CONFIG_TRIGGERS
+ if (flags & MNT_NOBLOCK && p != kernproc)
+ pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
+#endif
+
+ mp->mnt_kern_flag |= MNTK_UNMOUNT;
+ mp->mnt_lflag |= MNT_LUNMOUNT;
+ mp->mnt_flag &=~ MNT_ASYNC;
+ /*
+ * anyone currently in the fast path that
+ * trips over the cached rootvp will be
+ * dumped out and forced into the slow path
+ * to regenerate a new cached value
+ */
+ mp->mnt_realrootvp = NULLVP;
+ mount_unlock(mp);
+
+ if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
+ /*
+ * Force unmount any mounts in this filesystem.
+ * If any unmounts fail - just leave them dangling.
+ * Avoids recursion.
+ */
+ (void) dounmount_submounts(mp, flags | MNT_LNOSUB, ctx);
+ }
+
+ /*
+ * taking the name_cache_lock exclusively will
+ * insure that everyone is out of the fast path who
+ * might be trying to use a now stale copy of
+ * vp->v_mountedhere->mnt_realrootvp
+ * bumping mount_generation causes the cached values
+ * to be invalidated
+ */
+ name_cache_lock();
+ mount_generation++;
+ name_cache_unlock();
+
+
+ lck_rw_lock_exclusive(&mp->mnt_rwlock);
+ if (withref != 0)
+ mount_drop(mp, 0);
+#if CONFIG_FSE
+ fsevent_unmount(mp); /* has to come first! */
+#endif
+ error = 0;
+ if (forcedunmount == 0) {
+ ubc_umount(mp); /* release cached vnodes */
+ if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ error = VFS_SYNC(mp, MNT_WAIT, ctx);
+ if (error) {
+ mount_lock(mp);
+ mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+ mp->mnt_lflag &= ~MNT_LUNMOUNT;
+ mp->mnt_lflag &= ~MNT_LFORCE;
+ goto out;
+ }
+ }
+ }
+
+ IOBSDMountChange(mp, kIOMountChangeUnmount);
+
+#if CONFIG_TRIGGERS
+ vfs_nested_trigger_unmounts(mp, flags, ctx);
+ did_vflush = 1;
+#endif
+ if (forcedunmount)
+ lflags |= FORCECLOSE;
+ error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags);
+ if ((forcedunmount == 0) && error) {
+ mount_lock(mp);
+ mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+ mp->mnt_lflag &= ~MNT_LUNMOUNT;
+ mp->mnt_lflag &= ~MNT_LFORCE;
+ goto out;
+ }
+
+ /* make sure there are no one in the mount iterations or lookup */
+ mount_iterdrain(mp);
+
+ error = VFS_UNMOUNT(mp, flags, ctx);
+ if (error) {
+ mount_iterreset(mp);
+ mount_lock(mp);
+ mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
+ mp->mnt_lflag &= ~MNT_LUNMOUNT;
+ mp->mnt_lflag &= ~MNT_LFORCE;
+ goto out;
+ }
+
+ /* increment the operations count */
+ if (!error)
+ OSAddAtomic(1, &vfs_nummntops);
+
+ if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
+ /* hold an io reference and drop the usecount before close */
+ devvp = mp->mnt_devvp;
+ vnode_getalways(devvp);
+ vnode_rele(devvp);
+ VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
+ ctx);
+ vnode_clearmountedon(devvp);
+ vnode_put(devvp);
+ }
+ lck_rw_done(&mp->mnt_rwlock);
+ mount_list_remove(mp);
+ lck_rw_lock_exclusive(&mp->mnt_rwlock);
+
+ /* mark the mount point hook in the vp but not drop the ref yet */
+ if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
+ /*
+ * The covered vnode needs special handling. Trying to get an
+ * iocount must not block here as this may lead to deadlocks
+ * if the Filesystem to which the covered vnode belongs is
+ * undergoing forced unmounts. Since we hold a usecount, the
+ * vnode cannot be reused (it can, however, still be terminated)
+ */
+ vnode_getalways(coveredvp);
+ vnode_lock_spin(coveredvp);
+
+ mp->mnt_crossref++;
+ coveredvp->v_mountedhere = (struct mount *)0;
+ CLR(coveredvp->v_flag, VMOUNT);
+
+ vnode_unlock(coveredvp);
+ vnode_put(coveredvp);
+ }
+
+ mount_list_lock();
+ mp->mnt_vtable->vfc_refcount--;
+ mount_list_unlock();
+
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
+ mount_lock(mp);
+ mp->mnt_lflag |= MNT_LDEAD;
+
+ if (mp->mnt_lflag & MNT_LWAIT) {
+ /*
+ * do the wakeup here
+ * in case we block in mount_refdrain
+ * which will drop the mount lock
+ * and allow anyone blocked in vfs_busy
+ * to wakeup and see the LDEAD state
+ */
+ mp->mnt_lflag &= ~MNT_LWAIT;
+ wakeup((caddr_t)mp);
+ }
+ mount_refdrain(mp);
+out:
+ if (mp->mnt_lflag & MNT_LWAIT) {
+ mp->mnt_lflag &= ~MNT_LWAIT;
+ needwakeup = 1;
+ }
+
+#if CONFIG_TRIGGERS
+ if (flags & MNT_NOBLOCK && p != kernproc) {
+ // Restore P_NOREMOTEHANG bit to its previous value
+ if ((pflags_save & P_NOREMOTEHANG) == 0)
+ OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
+ }
+
+ /*
+ * Callback and context are set together under the mount lock, and
+ * never cleared, so we're safe to examine them here, drop the lock,
+ * and call out.
+ */
+ if (mp->mnt_triggercallback != NULL) {
+ mount_unlock(mp);
+ if (error == 0) {
+ mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
+ } else if (did_vflush) {
+ mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
+ }
+ } else {
+ mount_unlock(mp);
+ }
+#else
+ mount_unlock(mp);
+#endif /* CONFIG_TRIGGERS */
+
+ lck_rw_done(&mp->mnt_rwlock);
+
+ if (needwakeup)
+ wakeup((caddr_t)mp);
+
+ if (!error) {
+ if ((coveredvp != NULLVP)) {
+ vnode_t pvp = NULLVP;
+
+ /*
+ * The covered vnode needs special handling. Trying to
+ * get an iocount must not block here as this may lead
+ * to deadlocks if the Filesystem to which the covered
+ * vnode belongs is undergoing forced unmounts. Since we
+ * hold a usecount, the vnode cannot be reused
+ * (it can, however, still be terminated).
+ */
+ vnode_getalways(coveredvp);
+
+ mount_dropcrossref(mp, coveredvp, 0);
+ /*
+ * We'll _try_ to detect if this really needs to be
+ * done. The coveredvp can only be in termination (or
+ * terminated) if the coveredvp's mount point is in a
+ * forced unmount (or has been) since we still hold the
+ * ref.
+ */
+ if (!vnode_isrecycled(coveredvp)) {
+ pvp = vnode_getparent(coveredvp);
+#if CONFIG_TRIGGERS
+ if (coveredvp->v_resolve) {
+ vnode_trigger_rearm(coveredvp, ctx);
+ }
+#endif
+ }
+
+ vnode_rele(coveredvp);
+ vnode_put(coveredvp);
+ coveredvp = NULLVP;
+
+ if (pvp) {
+ lock_vnode_and_post(pvp, NOTE_WRITE);
+ vnode_put(pvp);
+ }
+ } else if (mp->mnt_flag & MNT_ROOTFS) {
+ mount_lock_destroy(mp);
+#if CONFIG_MACF
+ mac_mount_label_destroy(mp);
+#endif
+ FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+ } else
+ panic("dounmount: no coveredvp");
+ }
+ return (error);
+}
+
+/*
+ * Unmount any mounts in this filesystem.
+ */
+void
+dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
+{
+ mount_t smp;
+ fsid_t *fsids, fsid;
+ int fsids_sz;
+ int count = 0, i, m = 0;
+ vnode_t vp;
+
+ mount_list_lock();
+
+ // Get an array to hold the submounts fsids.
+ TAILQ_FOREACH(smp, &mountlist, mnt_list)
+ count++;
+ fsids_sz = count * sizeof(fsid_t);
+ MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
+ if (fsids == NULL) {
+ mount_list_unlock();
+ goto out;
+ }
+ fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
+
+ /*
+ * Fill the array with submount fsids.
+ * Since mounts are always added to the tail of the mount list, the
+ * list is always in mount order.
+ * For each mount check if the mounted-on vnode belongs to a
+ * mount that's already added to our array of mounts to be unmounted.
+ */
+ for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
+ vp = smp->mnt_vnodecovered;
+ if (vp == NULL)
+ continue;
+ fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
+ for (i = 0; i <= m; i++) {
+ if (fsids[i].val[0] == fsid.val[0] &&
+ fsids[i].val[1] == fsid.val[1]) {
+ fsids[++m] = smp->mnt_vfsstat.f_fsid;
+ break;
+ }
+ }
+ }
+ mount_list_unlock();
+
+ // Unmount the submounts in reverse order. Ignore errors.
+ for (i = m; i > 0; i--) {
+ smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
+ if (smp) {
+ mount_ref(smp, 0);
+ mount_iterdrop(smp);
+ (void) dounmount(smp, flags, 1, ctx);
+ }
+ }
+out:
+ if (fsids)
+ FREE(fsids, M_TEMP);
+}
+
+void
+mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
+{
+ vnode_lock(dp);
+ mp->mnt_crossref--;
+
+ if (mp->mnt_crossref < 0)
+ panic("mount cross refs -ve");
+
+ if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
+
+ if (need_put)
+ vnode_put_locked(dp);
+ vnode_unlock(dp);
+
+ mount_lock_destroy(mp);
+#if CONFIG_MACF
+ mac_mount_label_destroy(mp);
+#endif
+ FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
+ return;
+ }
+ if (need_put)
+ vnode_put_locked(dp);
+ vnode_unlock(dp);
+}
+
+
+/*
+ * Sync each mounted filesystem.
+ */
+#if DIAGNOSTIC
+int syncprt = 0;
+#endif
+
+int print_vmpage_stat=0;
+int sync_timeout = 60; // Sync time limit (sec)
+
+static int
+sync_callback(mount_t mp, __unused void *arg)
+{
+ if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ int asyncflag = mp->mnt_flag & MNT_ASYNC;
+
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ }
+
+ return (VFS_RETURNED);
+}
+
+/* ARGSUSED */
+int
+sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval)
+{
+ vfs_iterate(LK_NOWAIT, sync_callback, NULL);
+
+ if (print_vmpage_stat) {
+ vm_countdirtypages();
+ }
+
+#if DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return 0;
+}
+
+static void
+sync_thread(void *arg, __unused wait_result_t wr)
+{
+ int *timeout = (int *) arg;
+
+ vfs_iterate(LK_NOWAIT, sync_callback, NULL);
+
+ if (timeout)
+ wakeup((caddr_t) timeout);
+ if (print_vmpage_stat) {
+ vm_countdirtypages();
+ }
+
+#if DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+}
+
+/*
+ * Sync in a separate thread so we can time out if it blocks.
+ */
+static int
+sync_async(int timeout)
+{
+ thread_t thd;
+ int error;
+ struct timespec ts = {timeout, 0};
+
+ lck_mtx_lock(sync_mtx_lck);
+ if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
+ printf("sync_thread failed\n");
+ lck_mtx_unlock(sync_mtx_lck);
+ return (0);
+ }
+
+ error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS | PDROP | PCATCH), "sync_thread", &ts);
+ if (error) {
+ printf("sync timed out: %d sec\n", timeout);
+ }
+ thread_deallocate(thd);
+
+ return (0);
+}
+
+/*
+ * An in-kernel sync for power management to call.
+ */
+__private_extern__ int
+sync_internal(void)
+{
+ (void) sync_async(sync_timeout);
+
+ return 0;
+} /* end of sync_internal call */
+
+/*
+ * Change filesystem quotas.
+ */
+#if QUOTA
+int
+quotactl(proc_t p, struct quotactl_args *uap, __unused int32_t *retval)
+{
+ struct mount *mp;
+ int error, quota_cmd, quota_status;
+ caddr_t datap;
+ size_t fnamelen;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ struct dqblk my_dqblk;
+
+ AUDIT_ARG(uid, uap->uid);
+ AUDIT_ARG(cmd, uap->cmd);
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+
+ /* copyin any data we will need for downstream code */
+ quota_cmd = uap->cmd >> SUBCMDSHIFT;
+
+ switch (quota_cmd) {
+ case Q_QUOTAON:
+ /* uap->arg specifies a file from which to take the quotas */
+ fnamelen = MAXPATHLEN;
+ datap = kalloc(MAXPATHLEN);
+ error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
+ break;
+ case Q_GETQUOTA:
+ /* uap->arg is a pointer to a dqblk structure. */
+ datap = (caddr_t) &my_dqblk;
+ break;
+ case Q_SETQUOTA:
+ case Q_SETUSE:
+ /* uap->arg is a pointer to a dqblk structure. */
+ datap = (caddr_t) &my_dqblk;
+ if (proc_is64bit(p)) {
+ struct user_dqblk my_dqblk64;
+ error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
+ if (error == 0) {
+ munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
+ }
+ }
+ else {
+ error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
+ }
+ break;
+ case Q_QUOTASTAT:
+ /* uap->arg is a pointer to an integer */
+ datap = (caddr_t) "a_status;
+ break;
+ default:
+ datap = NULL;
+ break;
+ } /* switch */
+
+ if (error == 0) {
+ error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
+ }
+
+ switch (quota_cmd) {
+ case Q_QUOTAON:
+ if (datap != NULL)
+ kfree(datap, MAXPATHLEN);
+ break;
+ case Q_GETQUOTA:
+ /* uap->arg is a pointer to a dqblk structure we need to copy out to */
+ if (error == 0) {
+ if (proc_is64bit(p)) {
+ struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
+ munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
+ error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
+ }
+ else {
+ error = copyout(datap, uap->arg, sizeof (struct dqblk));
+ }
+ }
+ break;
+ case Q_QUOTASTAT:
+ /* uap->arg is a pointer to an integer */
+ if (error == 0) {
+ error = copyout(datap, uap->arg, sizeof(quota_status));
+ }
+ break;
+ default:
+ break;
+ } /* switch */
+
+ return (error);
+}
+#else
+int
+quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval)
+{
+ return (EOPNOTSUPP);
+}
+#endif /* QUOTA */
+
+/*
+ * Get filesystem statistics.
+ *
+ * Returns: 0 Success
+ * namei:???
+ * vfs_update_vfsstat:???
+ * munge_statfs:EFAULT
+ */
+/* ARGSUSED */
+int
+statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval)
+{
+ struct mount *mp;
+ struct vfsstatfs *sp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp;
+
+ NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+ mp = vp->v_mount;
+ sp = &mp->mnt_vfsstat;
+ nameidone(&nd);
+
+ error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
+ if (error != 0) {
+ vnode_put(vp);
+ return (error);
+ }
+
+ error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
+ vnode_put(vp);
+ return (error);
+}
+
+/*
+ * Get filesystem statistics.
+ */
+/* ARGSUSED */
+int
+fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct mount *mp;
+ struct vfsstatfs *sp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ error = vnode_getwithref(vp);
+ if (error) {
+ file_drop(uap->fd);
+ return (error);
+ }
+
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+
+ mp = vp->v_mount;
+ if (!mp) {
+ error = EBADF;
+ goto out;
+ }
+ sp = &mp->mnt_vfsstat;
+ if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
+ goto out;
+ }
+
+ error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
+
+out:
+ file_drop(uap->fd);
+ vnode_put(vp);
+
+ return (error);
+}
+
+/*
+ * Common routine to handle copying of statfs64 data to user space
+ */
+static int
+statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp)
+{
+ int error;
+ struct statfs64 sfs;
+
+ bzero(&sfs, sizeof(sfs));
+
+ sfs.f_bsize = sfsp->f_bsize;
+ sfs.f_iosize = (int32_t)sfsp->f_iosize;
+ sfs.f_blocks = sfsp->f_blocks;
+ sfs.f_bfree = sfsp->f_bfree;
+ sfs.f_bavail = sfsp->f_bavail;
+ sfs.f_files = sfsp->f_files;
+ sfs.f_ffree = sfsp->f_ffree;
+ sfs.f_fsid = sfsp->f_fsid;
+ sfs.f_owner = sfsp->f_owner;
+ sfs.f_type = mp->mnt_vtable->vfc_typenum;
+ sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ sfs.f_fssubtype = sfsp->f_fssubtype;
+ if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
+ strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
+ } else {
+ strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
+ }
+ strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
+ strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
+
+ error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
+
+ return(error);
+}
+
+/*
+ * Get file system statistics in 64-bit mode
+ */
+int
+statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval)
+{
+ struct mount *mp;
+ struct vfsstatfs *sp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctxp = vfs_context_current();
+ vnode_t vp;
+
+ NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctxp);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+ mp = vp->v_mount;
+ sp = &mp->mnt_vfsstat;
+ nameidone(&nd);
+
+ error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
+ if (error != 0) {
+ vnode_put(vp);
+ return (error);
+ }
+
+ error = statfs64_common(mp, sp, uap->buf);
+ vnode_put(vp);
+
+ return (error);
+}
+
+/*
+ * Get file system statistics in 64-bit mode
+ */
+int
+fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval)
+{
+ struct vnode *vp;
+ struct mount *mp;
+ struct vfsstatfs *sp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ error = vnode_getwithref(vp);
+ if (error) {
+ file_drop(uap->fd);
+ return (error);
+ }
+
+ AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
+
+ mp = vp->v_mount;
+ if (!mp) {
+ error = EBADF;
+ goto out;
+ }
+ sp = &mp->mnt_vfsstat;
+ if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
+ goto out;
+ }
+
+ error = statfs64_common(mp, sp, uap->buf);
+
+out:
+ file_drop(uap->fd);
+ vnode_put(vp);
+
+ return (error);
+}
+
+struct getfsstat_struct {
+ user_addr_t sfsp;
+ user_addr_t *mp;
+ int count;
+ int maxcount;
+ int flags;
+ int error;
+};
+
+
+static int
+getfsstat_callback(mount_t mp, void * arg)
+{
+
+ struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+ struct vfsstatfs *sp;
+ int error, my_size;
+ vfs_context_t ctx = vfs_context_current();
+
+ if (fstp->sfsp && fstp->count < fstp->maxcount) {
+ sp = &mp->mnt_vfsstat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
+ */
+ if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (error = vfs_update_vfsstat(mp, ctx,
+ VFS_USER_EVENT))) {
+ KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+ return(VFS_RETURNED);
+ }
+
+ /*
+ * Need to handle LP64 version of struct statfs
+ */
+ error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+ fstp->sfsp += my_size;
+
+ if (fstp->mp) {
+#if CONFIG_MACF
+ error = mac_mount_label_get(mp, *fstp->mp);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+#endif
+ fstp->mp++;
+ }
+ }
+ fstp->count++;
+ return(VFS_RETURNED);
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+int
+getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval)
+{
+ struct __mac_getfsstat_args muap;
+
+ muap.buf = uap->buf;
+ muap.bufsize = uap->bufsize;
+ muap.mac = USER_ADDR_NULL;
+ muap.macsize = 0;
+ muap.flags = uap->flags;
+
+ return (__mac_getfsstat(p, &muap, retval));
+}
+
+/*
+ * __mac_getfsstat: Get MAC-related file system statistics
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval Count of file system statistics (N stats)
+ *
+ * Indirect: uap->bufsize Buffer size
+ * uap->macsize MAC info size
+ * uap->buf Buffer where information will be returned
+ * uap->mac MAC info
+ * uap->flags File system flags
+ *
+ *
+ * Returns: 0 Success
+ * !0 Not success
+ *
+ */
+int
+__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval)
+{
+ user_addr_t sfsp;
+ user_addr_t *mp;
+ size_t count, maxcount, bufsize, macsize;
+ struct getfsstat_struct fst;
+
+ bufsize = (size_t) uap->bufsize;
+ macsize = (size_t) uap->macsize;
+
+ if (IS_64BIT_PROCESS(p)) {
+ maxcount = bufsize / sizeof(struct user64_statfs);
+ }
+ else {
+ maxcount = bufsize / sizeof(struct user32_statfs);
+ }
+ sfsp = uap->buf;
+ count = 0;
+
+ mp = NULL;
+
+#if CONFIG_MACF
+ if (uap->mac != USER_ADDR_NULL) {
+ u_int32_t *mp0;
+ int error;
+ unsigned int i;
+
+ count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
+ if (count != maxcount)
+ return (EINVAL);
+
+ /* Copy in the array */
+ MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
+ if (mp0 == NULL) {
+ return (ENOMEM);
+ }
+
+ error = copyin(uap->mac, mp0, macsize);
+ if (error) {
+ FREE(mp0, M_MACTEMP);
+ return (error);
+ }
+
+ /* Normalize to an array of user_addr_t */
+ MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
+ if (mp == NULL) {
+ FREE(mp0, M_MACTEMP);
+ return (ENOMEM);
+ }
+
+ for (i = 0; i < count; i++) {
+ if (IS_64BIT_PROCESS(p))
+ mp[i] = ((user_addr_t *)mp0)[i];
+ else
+ mp[i] = (user_addr_t)mp0[i];
+ }
+ FREE(mp0, M_MACTEMP);
+ }
+#endif
+
+
+ fst.sfsp = sfsp;
+ fst.mp = mp;
+ fst.flags = uap->flags;
+ fst.count = 0;
+ fst.error = 0;
+ fst.maxcount = maxcount;
+
+
+ vfs_iterate(0, getfsstat_callback, &fst);
+
+ if (mp)
+ FREE(mp, M_MACTEMP);
+
+ if (fst.error ) {
+ KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+ return(fst.error);
+ }
+
+ if (fst.sfsp && fst.count > fst.maxcount)
+ *retval = fst.maxcount;
+ else
+ *retval = fst.count;
+ return (0);
+}
+
+static int
+getfsstat64_callback(mount_t mp, void * arg)
+{
+ struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg;
+ struct vfsstatfs *sp;
+ int error;
+
+ if (fstp->sfsp && fstp->count < fstp->maxcount) {
+ sp = &mp->mnt_vfsstat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the fsstat
+ * cache. MNT_WAIT overrides MNT_NOWAIT.
+ *
+ * We treat MNT_DWAIT as MNT_WAIT for all instances of
+ * getfsstat, since the constants are out of the same
+ * namespace.
+ */
+ if (((fstp->flags & MNT_NOWAIT) == 0 ||
+ (fstp->flags & (MNT_WAIT | MNT_DWAIT))) &&
+ (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
+ KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
+ return(VFS_RETURNED);
+ }
+
+ error = statfs64_common(mp, sp, fstp->sfsp);
+ if (error) {
+ fstp->error = error;
+ return(VFS_RETURNED_DONE);
+ }
+ fstp->sfsp += sizeof(struct statfs64);
+ }
+ fstp->count++;
+ return(VFS_RETURNED);
+}
+
+/*
+ * Get statistics on all file systems in 64 bit mode.
+ */
+int
+getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval)
+{
+ user_addr_t sfsp;
+ int count, maxcount;
+ struct getfsstat_struct fst;
+
+ maxcount = uap->bufsize / sizeof(struct statfs64);
+
+ sfsp = uap->buf;
+ count = 0;
+
+ fst.sfsp = sfsp;
+ fst.flags = uap->flags;
+ fst.count = 0;
+ fst.error = 0;
+ fst.maxcount = maxcount;
+
+ vfs_iterate(0, getfsstat64_callback, &fst);
+
+ if (fst.error ) {
+ KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
+ return(fst.error);
+ }
+
+ if (fst.sfsp && fst.count > fst.maxcount)
+ *retval = fst.maxcount;
+ else
+ *retval = fst.count;
+
+ return (0);
+}
+
+/*
+ * gets the associated vnode with the file descriptor passed.
+ * as input
+ *
+ * INPUT
+ * ctx - vfs context of caller
+ * fd - file descriptor for which vnode is required.
+ * vpp - Pointer to pointer to vnode to be returned.
+ *
+ * The vnode is returned with an iocount so any vnode obtained
+ * by this call needs a vnode_put
+ *
+ */
+static int
+vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
+{
+ int error;
+ vnode_t vp;
+ struct fileproc *fp;
+ proc_t p = vfs_context_proc(ctx);
+
+ *vpp = NULLVP;
+
+ error = fp_getfvp(p, fd, &fp, &vp);
+ if (error)
+ return (error);
+
+ error = vnode_getwithref(vp);
+ if (error) {
+ (void)fp_drop(p, fd, fp, 0);
+ return (error);
+ }
+
+ (void)fp_drop(p, fd, fp, 0);
+ *vpp = vp;
+ return (error);
+}
+
+/*
+ * Wrapper function around namei to start lookup from a directory
+ * specified by a file descriptor ni_dirfd.
+ *
+ * In addition to all the errors returned by namei, this call can
+ * return ENOTDIR if the file descriptor does not refer to a directory.
+ * and EBADF if the file descriptor is not valid.
+ */
+int
+nameiat(struct nameidata *ndp, int dirfd)
+{
+ if ((dirfd != AT_FDCWD) &&
+ !(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
+ !(ndp->ni_cnd.cn_flags & USEDVP)) {
+ int error = 0;
+ char c;
+
+ if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
+ error = copyin(ndp->ni_dirp, &c, sizeof(char));
+ if (error)
+ return (error);
+ } else {
+ c = *((char *)(ndp->ni_dirp));
+ }
+
+ if (c != '/') {
+ vnode_t dvp_at;
+
+ error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
+ &dvp_at);
+ if (error)
+ return (error);
+
+ if (vnode_vtype(dvp_at) != VDIR) {
+ vnode_put(dvp_at);
+ return (ENOTDIR);
+ }
+
+ ndp->ni_dvp = dvp_at;
+ ndp->ni_cnd.cn_flags |= USEDVP;
+ error = namei(ndp);
+ ndp->ni_cnd.cn_flags &= ~USEDVP;
+ vnode_put(dvp_at);
+ return (error);
+ }
+ }
+
+ return (namei(ndp));
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+/* ARGSUSED */
+static int
+common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
+{
+ struct filedesc *fdp = p->p_fd;
+ vnode_t vp;
+ vnode_t tdp;
+ vnode_t tvp;
+ struct mount *mp;
+ int error;
+ vfs_context_t ctx = vfs_context_current();
+
+ AUDIT_ARG(fd, uap->fd);
+ if (per_thread && uap->fd == -1) {
+ /*
+ * Switching back from per-thread to per process CWD; verify we
+ * in fact have one before proceeding. The only success case
+ * for this code path is to return 0 preemptively after zapping
+ * the thread structure contents.
+ */
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = NULLVP;
+ if (tvp != NULLVP) {
+ vnode_rele(tvp);
+ return (0);
+ }
+ }
+ return (EBADF);
+ }
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return(error);
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chdir(ctx, vp);
+ if (error)
+ goto out;
+#endif
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+ if (error)
+ goto out;
+
+ while (!error && (mp = vp->v_mountedhere) != NULL) {
+ if (vfs_busy(mp, LK_NOWAIT)) {
+ error = EACCES;
+ goto out;
+ }
+ error = VFS_ROOT(mp, &tdp, ctx);
+ vfs_unbusy(mp);
+ if (error)
+ break;
+ vnode_put(vp);
+ vp = tdp;
+ }
+ if (error)
+ goto out;
+ if ( (error = vnode_ref(vp)) )
+ goto out;
+ vnode_put(vp);
+
+ if (per_thread) {
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = vp;
+ OSBitOrAtomic(P_THCWD, &p->p_flag);
+ } else {
+ vnode_rele(vp);
+ return (ENOENT);
+ }
+ } else {
+ proc_fdlock(p);
+ tvp = fdp->fd_cdir;
+ fdp->fd_cdir = vp;
+ proc_fdunlock(p);
+ }
+
+ if (tvp)
+ vnode_rele(tvp);
+ file_drop(uap->fd);
+
+ return (0);
+out:
+ vnode_put(vp);
+ file_drop(uap->fd);
+
+ return(error);
+}
+
+int
+fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval)
+{
+ return common_fchdir(p, uap, 0);
+}
+
+int
+__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval)
+{
+ return common_fchdir(p, (void *)uap, 1);
+}
+
+/*
+ * Change current working directory (".").
+ *
+ * Returns: 0 Success
+ * change_dir:ENOTDIR
+ * change_dir:???
+ * vnode_ref:ENOENT No such file or directory
+ */
+/* ARGSUSED */
+static int
+common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
+{
+ struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+ vnode_t tvp;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = change_dir(&nd, ctx);
+ if (error)
+ return (error);
+ if ( (error = vnode_ref(nd.ni_vp)) ) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+ /*
+ * drop the iocount we picked up in change_dir
+ */
+ vnode_put(nd.ni_vp);
+
+ if (per_thread) {
+ thread_t th = vfs_context_thread(ctx);
+ if (th) {
+ uthread_t uth = get_bsdthread_info(th);
+ tvp = uth->uu_cdir;
+ uth->uu_cdir = nd.ni_vp;
+ OSBitOrAtomic(P_THCWD, &p->p_flag);
+ } else {
+ vnode_rele(nd.ni_vp);
+ return (ENOENT);
+ }
+ } else {
+ proc_fdlock(p);
+ tvp = fdp->fd_cdir;
+ fdp->fd_cdir = nd.ni_vp;
+ proc_fdunlock(p);
+ }
+
+ if (tvp)
+ vnode_rele(tvp);
+
+ return (0);
+}
+
+
+/*
+ * chdir
+ *
+ * Change current working directory (".") for the entire process
+ *
+ * Parameters: p Process requesting the call
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect parameters: uap->path Directory path
+ *
+ * Returns: 0 Success
+ * common_chdir: ENOTDIR
+ * common_chdir: ENOENT No such file or directory
+ * common_chdir: ???
+ *
+ */
+int
+chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval)
+{
+ return common_chdir(p, (void *)uap, 0);
+}
+
+/*
+ * __pthread_chdir
+ *
+ * Change current working directory (".") for a single thread
+ *
+ * Parameters: p Process requesting the call
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect parameters: uap->path Directory path
+ *
+ * Returns: 0 Success
+ * common_chdir: ENOTDIR
+ * common_chdir: ENOENT No such file or directory
+ * common_chdir: ???
+ *
+ */
+int
+__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval)
+{
+ return common_chdir(p, (void *)uap, 1);
+}
+
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+/* ARGSUSED */
+int
+chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval)
+{
+ struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+ vnode_t tvp;
+ vfs_context_t ctx = vfs_context_current();
+
+ if ((error = suser(kauth_cred_get(), &p->p_acflag)))
+ return (error);
+
+ NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = change_dir(&nd, ctx);
+ if (error)
+ return (error);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chroot(ctx, nd.ni_vp,
+ &nd.ni_cnd);
+ if (error) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+#endif
+
+ if ( (error = vnode_ref(nd.ni_vp)) ) {
+ vnode_put(nd.ni_vp);
+ return (error);
+ }
+ vnode_put(nd.ni_vp);
+
+ proc_fdlock(p);
+ tvp = fdp->fd_rdir;
+ fdp->fd_rdir = nd.ni_vp;
+ fdp->fd_flags |= FD_CHROOT;
+ proc_fdunlock(p);
+
+ if (tvp != NULL)
+ vnode_rele(tvp);
+
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ *
+ * Returns: 0 Success
+ * ENOTDIR Not a directory
+ * namei:??? [anything namei can return]
+ * vnode_authorize:??? [anything vnode_authorize can return]
+ */
+static int
+change_dir(struct nameidata *ndp, vfs_context_t ctx)
+{
+ vnode_t vp;
+ int error;
+
+ if ((error = namei(ndp)))
+ return (error);
+ nameidone(ndp);
+ vp = ndp->ni_vp;
+
+ if (vp->v_type != VDIR) {
+ vnode_put(vp);
+ return (ENOTDIR);
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_chdir(ctx, vp);
+ if (error) {
+ vnode_put(vp);
+ return (error);
+ }
+#endif
+
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
+ if (error) {
+ vnode_put(vp);
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * Free the vnode data (for directories) associated with the file glob.
+ */
+struct fd_vn_data *
+fg_vn_data_alloc(void)
+{
+ struct fd_vn_data *fvdata;
+
+ /* Allocate per fd vnode data */
+ MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
+ M_FD_VN_DATA, M_WAITOK | M_ZERO);
+ lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
+ return fvdata;
+}
+
+/*
+ * Free the vnode data (for directories) associated with the file glob.
+ */
+void
+fg_vn_data_free(void *fgvndata)
+{
+ struct fd_vn_data *fvdata = (struct fd_vn_data *)fgvndata;
+
+ if (fvdata->fv_buf)
+ FREE(fvdata->fv_buf, M_FD_DIRBUF);
+ lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
+ FREE(fvdata, M_FD_VN_DATA);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ *
+ * Returns: 0 Success
+ * EINVAL
+ * EINTR
+ * falloc:ENFILE
+ * falloc:EMFILE
+ * falloc:ENOMEM
+ * vn_open_auth:???
+ * dupfdopen:???
+ * VNOP_ADVLOCK:???
+ * vnode_setsize:???
+ *
+ * XXX Need to implement uid, gid
+ */
+int
+open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
+ struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra,
+ int32_t *retval)
+{
+ proc_t p = vfs_context_proc(ctx);
+ uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
+ struct fileproc *fp;
+ vnode_t vp;
+ int flags, oflags;
+ int type, indx, error;
+ struct flock lf;
+ struct vfs_context context;
+
+ oflags = uflags;
+
+ if ((oflags & O_ACCMODE) == O_ACCMODE)
+ return(EINVAL);
+
+ flags = FFLAGS(uflags);
+ CLR(flags, FENCRYPTED);
+ CLR(flags, FUNENCRYPTED);
+
+ AUDIT_ARG(fflags, oflags);
+ AUDIT_ARG(mode, vap->va_mode);
+
+ if ((error = falloc_withalloc(p,
+ &fp, &indx, ctx, fp_zalloc, cra)) != 0) {
+ return (error);
+ }
+ uu->uu_dupfd = -indx - 1;
+
+ if ((error = vn_open_auth(ndp, &flags, vap))) {
+ if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
+ if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
+ fp_drop(p, indx, NULL, 0);
+ *retval = indx;
+ return (0);
+ }
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fp_free(p, indx, fp);
+ return (error);
+ }
+ uu->uu_dupfd = 0;
+ vp = ndp->ni_vp;
+
+ fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY | FENCRYPTED | FUNENCRYPTED);
+ fp->f_fglob->fg_ops = &vnops;
+ fp->f_fglob->fg_data = (caddr_t)vp;
+
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+#if CONFIG_MACF
+ error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
+ F_SETLK, &lf);
+ if (error)
+ goto bad;
+#endif
+ if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
+ goto bad;
+ fp->f_fglob->fg_flag |= FHASLOCK;
+ }
+
+ /* try to truncate by setting the size attribute */
+ if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
+ goto bad;
+
+ /*
+ * For directories we hold some additional information in the fd.
+ */
+ if (vnode_vtype(vp) == VDIR) {
+ fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
+ } else {
+ fp->f_fglob->fg_vn_data = NULL;
+ }
+
+ vnode_put(vp);
+
+ /*
+ * The first terminal open (without a O_NOCTTY) by a session leader
+ * results in it being set as the controlling terminal.
+ */
+ if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
+ !(flags & O_NOCTTY)) {
+ int tmp = 0;
+
+ (void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
+ (caddr_t)&tmp, ctx);
+ }
+
+ proc_fdlock(p);
+ if (flags & O_CLOEXEC)
+ *fdflags(p, indx) |= UF_EXCLOSE;
+ if (flags & O_CLOFORK)
+ *fdflags(p, indx) |= UF_FORKCLOSE;
+ procfdtbl_releasefd(p, indx, NULL);
+ fp_drop(p, indx, fp, 1);
+ proc_fdunlock(p);
+
+ *retval = indx;
+
+ return (0);
+bad:
+ context = *vfs_context_current();
+ context.vc_ucred = fp->f_fglob->fg_cred;
+
+ if ((fp->f_fglob->fg_flag & FHASLOCK) &&
+ (FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_UNLCK;
+
+ (void)VNOP_ADVLOCK(
+ vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
+ }
+
+ vn_close(vp, fp->f_fglob->fg_flag, &context);
+ vnode_put(vp);
+ fp_free(p, indx, fp);
+
+ return (error);
+}
+
+/*
+ * While most of the *at syscall handlers can call nameiat() which
+ * is a wrapper around namei, the use of namei and initialisation
+ * of nameidata are far removed and in different functions - namei
+ * gets called in vn_open_auth for open1. So we'll just do here what
+ * nameiat() does.
+ */
+static int
+open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
+ struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, int32_t *retval,
+ int dirfd)
+{
+ if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
+ int error;
+ char c;
+
+ if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
+ error = copyin(ndp->ni_dirp, &c, sizeof(char));
+ if (error)
+ return (error);
+ } else {
+ c = *((char *)(ndp->ni_dirp));
+ }
+
+ if (c != '/') {
+ vnode_t dvp_at;
+
+ error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
+ &dvp_at);
+ if (error)
+ return (error);
+
+ if (vnode_vtype(dvp_at) != VDIR) {
+ vnode_put(dvp_at);
+ return (ENOTDIR);
+ }
+
+ ndp->ni_dvp = dvp_at;
+ ndp->ni_cnd.cn_flags |= USEDVP;
+ error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
+ retval);
+ vnode_put(dvp_at);
+ return (error);
+ }
+ }
+
+ return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
+}
+
+/*
+ * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval Pointer to an area to receive the
+ * return calue from the system call
+ *
+ * Indirect: uap->path Path to open (same as 'open')
+ * uap->flags Flags to open (same as 'open'
+ * uap->uid UID to set, if creating
+ * uap->gid GID to set, if creating
+ * uap->mode File mode, if creating (same as 'open')
+ * uap->xsecurity ACL to set, if creating
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval)
+{
+ struct filedesc *fdp = p->p_fd;
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = NULL;
+ if ((uap->xsecurity != USER_ADDR_NULL) &&
+ ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
+ return ciferror;
+
+ VATTR_INIT(&va);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+ if (xsecdst != NULL)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, vfs_context_current());
+
+ ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
+ fileproc_alloc_init, NULL, retval);
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+
+ return ciferror;
+}
+
+/*
+ * Go through the data-protected atomically controlled open (2)
+ *
+ * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
+ */
+int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) {
+ int flags = uap->flags;
+ int class = uap->class;
+ int dpflags = uap->dpflags;
+
+ /*
+ * Follow the same path as normal open(2)
+ * Look up the item if it exists, and acquire the vnode.
+ */
+ struct filedesc *fdp = p->p_fd;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+ int error;
+
+ VATTR_INIT(&va);
+ /* Mask off all but regular access permissions */
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE,
+ uap->path, vfs_context_current());
+
+ /*
+ * Initialize the extra fields in vnode_attr to pass down our
+ * extra fields.
+ * 1. target cprotect class.
+ * 2. set a flag to mark it as requiring open-raw-encrypted semantics.
+ */
+ if (flags & O_CREAT) {
+ /* lower level kernel code validates that the class is valid before applying it. */
+ if (class != PROTECTION_CLASS_DEFAULT) {
+ /*
+ * PROTECTION_CLASS_DEFAULT implies that we make the class for this
+ * file behave the same as open (2)
+ */
+ VATTR_SET(&va, va_dataprotect_class, class);
+ }
+ }
+
+ if (dpflags & (O_DP_GETRAWENCRYPTED|O_DP_GETRAWUNENCRYPTED)) {
+ if ( flags & (O_RDWR | O_WRONLY)) {
+ /* Not allowed to write raw encrypted bytes */
+ return EINVAL;
+ }
+ if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
+ VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
+ }
+ if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
+ VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
+ }
+ }
+
+ error = open1(vfs_context_current(), &nd, uap->flags, &va,
+ fileproc_alloc_init, NULL, retval);
+
+ return error;
+}
+
+static int
+openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
+ int fd, enum uio_seg segflg, int *retval)
+{
+ struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
+ struct vnode_attr va;
+ struct nameidata nd;
+ int cmode;
+
+ VATTR_INIT(&va);
+ /* Mask off all but regular access permissions */
+ cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
+ VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
+
+ NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1,
+ segflg, path, ctx);
+
+ return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
+ retval, fd));
+}
+
+int
+open(proc_t p, struct open_args *uap, int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
+}
+
+int
+open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
+ int32_t *retval)
+{
+ return (openat_internal(vfs_context_current(), uap->path, uap->flags,
+ uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
+}
+
+int
+openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
+ int32_t *retval)
+{
+ return (openat_internal(vfs_context_current(), uap->path, uap->flags,
+ uap->mode, uap->fd, UIO_USERSPACE, retval));
+}
+
+int
+openat(proc_t p, struct openat_args *uap, int32_t *retval)
+{
+ __pthread_testcancel(1);
+ return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
+}
+
+/*
+ * openbyid_np: open a file given a file system id and a file system object id
+ * the hfs file system object id is an fsobj_id_t {uint32, uint32}
+ * file systems that don't support object ids it is a node id (uint64_t).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval Pointer to an area to receive the
+ * return calue from the system call
+ *
+ * Indirect: uap->path Path to open (same as 'open')
+ *
+ * uap->fsid id of target file system
+ * uap->objid id of target file system object
+ * uap->flags Flags to open (same as 'open')
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+openbyid_np(__unused proc_t p, struct openbyid_np_args *uap, int *retval)
+{
+ fsid_t fsid;
+ uint64_t objid;
+ int error;
+ char *buf = NULL;
+ int buflen = MAXPATHLEN;
+ int pathlen = 0;
+ vfs_context_t ctx = vfs_context_current();
+
+ if ((error = priv_check_cred(vfs_context_ucred(ctx), PRIV_VFS_OPEN_BY_ID, 0))) {
+ return (error);
+ }
+
+ if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
+ return (error);
+ }
+
+ /*uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} */
+ if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
+ return (error);
+ }
+
+ AUDIT_ARG(value32, fsid.val[0]);
+ AUDIT_ARG(value64, objid);
+
+ /*resolve path from fsis, objid*/
+ do {
+ MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
+ if (buf == NULL) {
+ return (ENOMEM);
+ }
+
+ error = fsgetpath_internal(
+ ctx, fsid.val[0], objid,
+ buflen, buf, &pathlen);
+
+ if (error) {
+ FREE(buf, M_TEMP);
+ buf = NULL;
+ }
+ } while (error == ENOSPC && (buflen += MAXPATHLEN));
+
+ if (error) {
+ return error;
+ }
+
+ buf[pathlen] = 0;
+
+ error = openat_internal(
+ ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
+
+ FREE(buf, M_TEMP);
+
+ return error;
+}
+
+
+/*
+ * Create a special file.
+ */
+static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
+
+int
+mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+ vnode_t vp, dvp;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+ VATTR_SET(&va, va_rdev, uap->dev);
+
+ /* If it's a mknod() of a FIFO, call mkfifo1() instead */
+ if ((uap->mode & S_IFMT) == S_IFIFO)
+ return(mkfifo1(ctx, uap->path, &va));
+
+ AUDIT_ARG(mode, uap->mode);
+ AUDIT_ARG(value32, uap->dev);
+
+ if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
+ return (error);
+ NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFCHR:
+ VATTR_SET(&va, va_type, VCHR);
+ break;
+ case S_IFBLK:
+ VATTR_SET(&va, va_type, VBLK);
+ break;
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_create(ctx,
+ nd.ni_dvp, &nd.ni_cnd, &va);
+ if (error)
+ goto out;
+#endif
+
+ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out;
+
+ if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
+ goto out;
+
+ if (vp) {
+ int update_flags = 0;
+
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+#endif
+ }
+
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+
+ return (error);
+}
+
+/*
+ * Create a named pipe.
+ *
+ * Returns: 0 Success
+ * EEXIST
+ * namei:???
+ * vnode_authorize:???
+ * vn_create:???
+ */
+static int
+mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
+{
+ vnode_t vp, dvp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1,
+ UIO_USERSPACE, upath, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ /* check that this is a new file and authorize addition */
+ if (vp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_SET(vap, va_type, VFIFO);
+
+ if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
+ goto out;
+
+ error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
+out:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+
+ return error;
+}
+
+
+/*
+ * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval (Ignored)
+ *
+ * Indirect: uap->path Path to fifo (same as 'mkfifo')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->mode File mode to set (same as 'mkfifo')
+ * uap->xsecurity ACL to set, if creating
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval)
+{
+ int ciferror;
+ kauth_filesec_t xsecdst;
+ struct vnode_attr va;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ xsecdst = KAUTH_FILESEC_NONE;
+ if (uap->xsecurity != USER_ADDR_NULL) {
+ if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return ciferror;
+ }
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+
+ ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
+
+ if (xsecdst != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(xsecdst);
+ return ciferror;
+}
+
+/* ARGSUSED */
+int
+mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
+
+ return(mkfifo1(vfs_context_current(), uap->path, &va));
+}
+
+
+static char *
+my_strrchr(char *p, int ch)
+{
+ char *save;
+
+ for (save = NULL;; ++p) {
+ if (*p == ch)
+ save = p;
+ if (!*p)
+ return(save);
+ }
+ /* NOTREACHED */
+}
+
+extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path);
+
+int
+safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path)
+{
+ int ret, len = _len;
+
+ *truncated_path = 0;
+ ret = vn_getpath(dvp, path, &len);
+ if (ret == 0 && len < (MAXPATHLEN - 1)) {
+ if (leafname) {
+ path[len-1] = '/';
+ len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
+ if (len > MAXPATHLEN) {
+ char *ptr;
+
+ // the string got truncated!
+ *truncated_path = 1;
+ ptr = my_strrchr(path, '/');
+ if (ptr) {
+ *ptr = '\0'; // chop off the string at the last directory component
+ }
+ len = strlen(path) + 1;
+ }
+ }
+ } else if (ret == 0) {
+ *truncated_path = 1;
+ } else if (ret != 0) {
+ struct vnode *mydvp=dvp;
+
+ if (ret != ENOSPC) {
+ printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
+ dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
+ }
+ *truncated_path = 1;
+
+ do {
+ if (mydvp->v_parent != NULL) {
+ mydvp = mydvp->v_parent;
+ } else if (mydvp->v_mount) {
+ strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
+ break;
+ } else {
+ // no parent and no mount point? only thing is to punt and say "/" changed
+ strlcpy(path, "/", _len);
+ len = 2;
+ mydvp = NULL;
+ }
+
+ if (mydvp == NULL) {
+ break;
+ }
+
+ len = _len;
+ ret = vn_getpath(mydvp, path, &len);
+ } while (ret == ENOSPC);
+ }
+
+ return len;
+}
+
+
+/*
+ * Make a hard file link.
+ *
+ * Returns: 0 Success
+ * EPERM
+ * EEXIST
+ * EXDEV
+ * namei:???
+ * vnode_authorize:???
+ * VNOP_LINK:???
+ */
+/* ARGSUSED */
+static int
+linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
+ user_addr_t link, int flag, enum uio_seg segflg)
+{
+ vnode_t vp, dvp, lvp;
+ struct nameidata nd;
+ int follow;
+ int error;
+#if CONFIG_FSE
+ fse_info finfo;
+#endif
+ int need_event, has_listeners;
+ char *target_path = NULL;
+ int truncated=0;
+
+ vp = dvp = lvp = NULLVP;
+
+ /* look up the object we are linking to */
+ follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
+ NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 | follow,
+ segflg, path, ctx);
+
+ error = nameiat(&nd, fd1);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ /*
+ * Normally, linking to directories is not supported.
+ * However, some file systems may have limited support.
+ */
+ if (vp->v_type == VDIR) {
+ if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
+ error = EPERM; /* POSIX */
+ goto out;
+ }
+ /* Linking to a directory requires ownership. */
+ if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
+ struct vnode_attr dva;
+
+ VATTR_INIT(&dva);
+ VATTR_WANTED(&dva, va_uid);
+ if (vnode_getattr(vp, &dva, ctx) != 0 ||
+ !VATTR_IS_SUPPORTED(&dva, va_uid) ||
+ (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
+ error = EACCES;
+ goto out;
+ }
+ }
+ }
+
+ /* lookup the target node */
+#if CONFIG_TRIGGERS
+ nd.ni_op = OP_LINK;
+#endif
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK;
+ nd.ni_dirp = link;
+ error = nameiat(&nd, fd2);
+ if (error != 0)
+ goto out;
+ dvp = nd.ni_dvp;
+ lvp = nd.ni_vp;
+
+#if CONFIG_MACF
+ if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
+ goto out2;
+#endif
+
+ /* or to anything that kauth doesn't want us to (eg. immutable items) */
+ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
+ goto out2;
+
+ /* target node must not exist */
+ if (lvp != NULLVP) {
+ error = EEXIST;
+ goto out2;
+ }
+ /* cannot link across mountpoints */
+ if (vnode_mount(vp) != vnode_mount(dvp)) {
+ error = EXDEV;
+ goto out2;
+ }
+
+ /* authorize creation of the target note */
+ if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
+ goto out2;
+
+ /* and finally make the link */
+ error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
+ if (error)
+ goto out2;
+
+#if CONFIG_MACF
+ (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
+#endif
+
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_CREATE_FILE, dvp);
+#else
+ need_event = 0;
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+
+ if (need_event || has_listeners) {
+ char *link_to_path = NULL;
+ int len, link_name_len;
+
+ /* build the path to the new link file */
+ GET_PATH(target_path);
+ if (target_path == NULL) {
+ error = ENOMEM;
+ goto out2;
+ }
+
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
+
+ if (has_listeners) {
+ /* build the path to file we are linking to */
+ GET_PATH(link_to_path);
+ if (link_to_path == NULL) {
+ error = ENOMEM;
+ goto out2;
+ }
+
+ link_name_len = MAXPATHLEN;
+ if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
+ /*
+ * Call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
+ (uintptr_t)link_to_path,
+ (uintptr_t)target_path);
+ }
+ if (link_to_path != NULL) {
+ RELEASE_PATH(link_to_path);
+ }
+ }
+#if CONFIG_FSE
+ if (need_event) {
+ /* construct fsevent */
+ if (get_fse_info(vp, &finfo, ctx) == 0) {
+ if (truncated) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+
+ // build the path to the destination of the link
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_STRING, len, target_path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+ if (vp->v_parent) {
+ add_fsevent(FSE_STAT_CHANGED, ctx,
+ FSE_ARG_VNODE, vp->v_parent,
+ FSE_ARG_DONE);
+ }
+ }
+#endif
+ }
+out2:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ if (target_path != NULL) {
+ RELEASE_PATH(target_path);
+ }
+out:
+ if (lvp)
+ vnode_put(lvp);
+ if (dvp)
+ vnode_put(dvp);
+ vnode_put(vp);
+ return (error);
+}
+
+int
+link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval)
+{
+ return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+ AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
+}
+
+int
+linkat(__unused proc_t p, struct linkat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_FOLLOW)
+ return (EINVAL);
+
+ return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
+ uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Make a symbolic link.
+ *
+ * We could add support for ACLs here too...
+ */
+/* ARGSUSED */
+static int
+symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
+ user_addr_t link, enum uio_seg segflg)
+{
+ struct vnode_attr va;
+ char *path;
+ int error;
+ struct nameidata nd;
+ vnode_t vp, dvp;
+ uint32_t dfflags; // Directory file flags
+ size_t dummy=0;
+ proc_t p;
+
+ error = 0;
+ if (UIO_SEG_IS_USER_SPACE(segflg)) {
+ MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
+ } else {
+ path = (char *)path_data;
+ }
+ if (error)
+ goto out;
+ AUDIT_ARG(text, path); /* This is the link string */
+
+ NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1,
+ segflg, link, ctx);
+
+ error = nameiat(&nd, fd);
+ if (error)
+ goto out;
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+ p = vfs_context_proc(ctx);
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_type, VLNK);
+ VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
+
+ /*
+ * Handle inheritance of restricted flag
+ */
+ error = vnode_flags(dvp, &dfflags, ctx);
+ if (error)
+ goto skipit;
+ if (dfflags & SF_RESTRICTED)
+ VATTR_SET(&va, va_flags, SF_RESTRICTED);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_create(ctx,
+ dvp, &nd.ni_cnd, &va);
+#endif
+ if (error != 0) {
+ goto skipit;
+ }
+
+ if (vp != NULL) {
+ error = EEXIST;
+ goto skipit;
+ }
+
+ /* authorize */
+ if (error == 0)
+ error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
+ /* get default ownership, etc. */
+ if (error == 0)
+ error = vnode_authattr_new(dvp, &va, 0, ctx);
+ if (error == 0)
+ error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
+
+#if CONFIG_MACF
+ if (error == 0 && vp)
+ error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
+#endif
+
+ /* do fallback attribute handling */
+ if (error == 0 && vp)
+ error = vnode_setattr_fallback(vp, &va, ctx);
+
+ if (error == 0) {
+ int update_flags = 0;
+
+ /*check if a new vnode was created, else try to get one*/
+ if (vp == NULL) {
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+#if CONFIG_TRIGGERS
+ nd.ni_op = OP_LOOKUP;
+#endif
+ nd.ni_cnd.cn_flags = 0;
+ error = nameiat(&nd, fd);
+ vp = nd.ni_vp;
+
+ if (vp == NULL)
+ goto skipit;
+ }
+
+#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
+ /* call out to allow 3rd party notification of rename.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (kauth_authorize_fileop_has_listeners() &&
+ namei(&nd) == 0) {
+ char *new_link_path = NULL;
+ int len;
+
+ /* build the path to the new link file */
+ new_link_path = get_pathbuff();
+ len = MAXPATHLEN;
+ vn_getpath(dvp, new_link_path, &len);
+ if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
+ new_link_path[len - 1] = '/';
+ strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
+ }
+
+ kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
+ (uintptr_t)path, (uintptr_t)new_link_path);
+ if (new_link_path != NULL)
+ release_pathbuff(new_link_path);
+ }
+#endif
+ // Make sure the name & parent pointers are hooked up
+ if (vp->v_name == NULL)
+ update_flags |= VNODE_UPDATE_NAME;
+ if (vp->v_parent == NULLVP)
+ update_flags |= VNODE_UPDATE_PARENT;
+
+ if (update_flags)
+ vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
+
+#if CONFIG_FSE
+ add_fsevent(FSE_CREATE_FILE, ctx,
+ FSE_ARG_VNODE, vp,
+ FSE_ARG_DONE);
+#endif
+ }
+
+skipit:
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+
+ if (vp)
+ vnode_put(vp);
+ vnode_put(dvp);
+out:
+ if (path && (path != (char *)path_data))
+ FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
+
+ return (error);
+}
+
+int
+symlink(__unused proc_t p, struct symlink_args *uap, __unused int32_t *retval)
+{
+ return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
+ uap->link, UIO_USERSPACE));
+}
+
+int
+symlinkat(__unused proc_t p, struct symlinkat_args *uap,
+ __unused int32_t *retval)
+{
+ return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
+ uap->path2, UIO_USERSPACE));
+}
+
+/*
+ * Delete a whiteout from the filesystem.
+ * No longer supported.
+ */
+int
+undelete(__unused proc_t p, __unused struct undelete_args *uap, __unused int32_t *retval)
+{
+ return (ENOTSUP);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+/* ARGSUSED */
+static int
+unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
+ user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
+{
+ struct nameidata nd;
+ vnode_t vp, dvp;
+ int error;
+ struct componentname *cnp;
+ char *path = NULL;
+ int len=0;
+#if CONFIG_FSE
+ fse_info finfo;
+ struct vnode_attr va;
+#endif
+ int flags;
+ int need_event;
+ int has_listeners;
+ int truncated_path;
+ int batched;
+ struct vnode_attr *vap;
+ int do_retry;
+ int retry_count = 0;
+ int cn_flags;
+
+ cn_flags = LOCKPARENT;
+ if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
+ cn_flags |= AUDITVNPATH1;
+ /* If a starting dvp is passed, it trumps any fd passed. */
+ if (start_dvp)
+ cn_flags |= USEDVP;
+
+#if NAMEDRSRCFORK
+ /* unlink or delete is allowed on rsrc forks and named streams */
+ cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+
+retry:
+ do_retry = 0;
+ flags = 0;
+ need_event = 0;
+ has_listeners = 0;
+ truncated_path = 0;
+ vap = NULL;
+
+ NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
+
+ nd.ni_dvp = start_dvp;
+ nd.ni_flag |= NAMEI_COMPOUNDREMOVE;
+ cnp = &nd.ni_cnd;
+
+lookup_continue:
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+
+ dvp = nd.ni_dvp;
+ vp = nd.ni_vp;
+
+
+ /* With Carbon delete semantics, busy files cannot be deleted */
+ if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
+ flags |= VNODE_REMOVE_NODELETEBUSY;
+ }
+
+ /* Skip any potential upcalls if told to. */
+ if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
+ flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
+ }
+
+ if (vp) {
+ batched = vnode_compound_remove_available(vp);
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT) {
+ error = EBUSY;
+ }
+
+ if (!batched) {
+ error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
+ if (error) {
+ if (error == ENOENT) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ do_retry = 1;
+ retry_count++;
+ }
+ }
+ goto out;
+ }
+ }
+ } else {
+ batched = 1;
+
+ if (!vnode_compound_remove_available(dvp)) {
+ panic("No vp, but no compound remove?");
+ }
+ }
+
+#if CONFIG_FSE
+ need_event = need_fsevent(FSE_DELETE, dvp);
+ if (need_event) {
+ if (!batched) {
+ if ((vp->v_flag & VISHARDLINK) == 0) {
+ /* XXX need to get these data in batched VNOP */
+ get_fse_info(vp, &finfo, ctx);
+ }
+ } else {
+ error = vfs_get_notify_attributes(&va);
+ if (error) {
+ goto out;
+ }
+
+ vap = &va;
+ }
+ }
+#endif
+ has_listeners = kauth_authorize_fileop_has_listeners();
+ if (need_event || has_listeners) {
+ if (path == NULL) {
+ GET_PATH(path);
+ if (path == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+ len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
+ }
+
+#if NAMEDRSRCFORK
+ if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
+ error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
+ else
+#endif
+ {
+ error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
+ vp = nd.ni_vp;
+ if (error == EKEEPLOOKING) {
+ if (!batched) {
+ panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
+ }
+
+ if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
+ panic("EKEEPLOOKING, but continue flag not set?");
+ }
+
+ if (vnode_isdir(vp)) {
+ error = EISDIR;
+ goto out;
+ }
+ goto lookup_continue;
+ } else if (error == ENOENT && batched) {
+ assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
+ if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
+ /*
+ * For compound VNOPs, the authorization callback may
+ * return ENOENT in case of racing hardlink lookups
+ * hitting the name cache, redrive the lookup.
+ */
+ do_retry = 1;
+ retry_count += 1;
+ goto out;
+ }
+ }
+ }
+
+ /*
+ * Call out to allow 3rd party notification of delete.
+ * Ignore result of kauth_authorize_fileop call.
+ */
+ if (!error) {
+ if (has_listeners) {
+ kauth_authorize_fileop(vfs_context_ucred(ctx),
+ KAUTH_FILEOP_DELETE,
+ (uintptr_t)vp,
+ (uintptr_t)path);
+ }
+
+ if (vp->v_flag & VISHARDLINK) {
+ //
+ // if a hardlink gets deleted we want to blow away the
+ // v_parent link because the path that got us to this
+ // instance of the link is no longer valid. this will
+ // force the next call to get the path to ask the file
+ // system instead of just following the v_parent link.
+ //
+ vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
+ }
+
+#if CONFIG_FSE
+ if (need_event) {
+ if (vp->v_flag & VISHARDLINK) {
+ get_fse_info(vp, &finfo, ctx);
+ } else if (vap) {
+ vnode_get_fse_info_from_vap(vp, &finfo, vap);
+ }
+ if (truncated_path) {
+ finfo.mode |= FSE_TRUNCATED_PATH;
+ }
+ add_fsevent(FSE_DELETE, ctx,
+ FSE_ARG_STRING, len, path,
+ FSE_ARG_FINFO, &finfo,
+ FSE_ARG_DONE);
+ }
+#endif
+ }
+
+out:
+ if (path != NULL)
+ RELEASE_PATH(path);
+
+#if NAMEDRSRCFORK
+ /* recycle the deleted rsrc fork vnode to force a reclaim, which
+ * will cause its shadow file to go away if necessary.
+ */
+ if (vp && (vnode_isnamedstream(vp)) &&
+ (vp->v_parent != NULLVP) &&
+ vnode_isshadow(vp)) {
+ vnode_recycle(vp);
+ }
+#endif
+ /*
+ * nameidone has to happen before we vnode_put(dvp)
+ * since it may need to release the fs_nodelock on the dvp
+ */
+ nameidone(&nd);
+ vnode_put(dvp);
+ if (vp) {
+ vnode_put(vp);
+ }
+
+ if (do_retry) {
+ goto retry;
+ }
+
+ return (error);
+}
+
+int
+unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
+ enum uio_seg segflg, int unlink_flags)
+{
+ return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
+ unlink_flags));
+}
+
+/*
+ * Delete a name from the filesystem using Carbon semantics.
+ */
+int
+delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval)
+{
+ return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
+ uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
+}
+
+/*
+ * Delete a name from the filesystem using POSIX semantics.
+ */
+int
+unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval)
+{
+ return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
+ uap->path, UIO_USERSPACE, 0));
+}
+
+int
+unlinkat(__unused proc_t p, struct unlinkat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_REMOVEDIR)
+ return (EINVAL);
+
+ if (uap->flag & AT_REMOVEDIR)
+ return (rmdirat_internal(vfs_context_current(), uap->fd,
+ uap->path, UIO_USERSPACE));
+ else
+ return (unlinkat_internal(vfs_context_current(), uap->fd,
+ NULLVP, uap->path, UIO_USERSPACE, 0));
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+int
+lseek(proc_t p, struct lseek_args *uap, off_t *retval)
+{
+ struct fileproc *fp;
+ vnode_t vp;
+ struct vfs_context *ctx;
+ off_t offset = uap->offset, file_size;
+ int error;
+
+ if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
+ if (error == ENOTSUP)
+ return (ESPIPE);
+ return (error);
+ }
+ if (vnode_isfifo(vp)) {
+ file_drop(uap->fd);
+ return(ESPIPE);
+ }
+
+
+ ctx = vfs_context_current();
+#if CONFIG_MACF
+ if (uap->whence == L_INCR && uap->offset == 0)
+ error = mac_file_check_get_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ else
+ error = mac_file_check_change_offset(vfs_context_ucred(ctx),
+ fp->f_fglob);
+ if (error) {
+ file_drop(uap->fd);
+ return (error);
+ }
+#endif
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ switch (uap->whence) {
+ case L_INCR:
+ offset += fp->f_fglob->fg_offset;
+ break;
+ case L_XTND:
+ if ((error = vnode_size(vp, &file_size, ctx)) != 0)
+ break;
+ offset += file_size;
+ break;
+ case L_SET:
+ break;
+ default:
+ error = EINVAL;
+ }
+ if (error == 0) {
+ if (uap->offset > 0 && offset < 0) {
+ /* Incremented/relative move past max size */
+ error = EOVERFLOW;
+ } else {
+ /*
+ * Allow negative offsets on character devices, per
+ * POSIX 1003.1-2001. Most likely for writing disk
+ * labels.
+ */
+ if (offset < 0 && vp->v_type != VCHR) {
+ /* Decremented/relative move before start */
+ error = EINVAL;
+ } else {
+ /* Success */
+ fp->f_fglob->fg_offset = offset;
+ *retval = fp->f_fglob->fg_offset;
+ }
+ }
+ }
+
+ /*
+ * An lseek can affect whether data is "available to read." Use
+ * hint of NOTE_NONE so no EVFILT_VNODE events fire
+ */
+ post_event_if_success(vp, error, NOTE_NONE);
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+
+/*
+ * Check access permissions.
+ *
+ * Returns: 0 Success
+ * vnode_authorize:???
+ */
+static int
+access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
+{
+ kauth_action_t action;
+ int error;
+
+ /*
+ * If just the regular access bits, convert them to something
+ * that vnode_authorize will understand.
+ */
+ if (!(uflags & _ACCESS_EXTENDED_MASK)) {
+ action = 0;
+ if (uflags & R_OK)
+ action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
+ if (uflags & W_OK) {
+ if (vnode_isdir(vp)) {
+ action |= KAUTH_VNODE_ADD_FILE |
+ KAUTH_VNODE_ADD_SUBDIRECTORY;
+ /* might want delete rights here too */
+ } else {
+ action |= KAUTH_VNODE_WRITE_DATA;
+ }
+ }
+ if (uflags & X_OK) {
+ if (vnode_isdir(vp)) {
+ action |= KAUTH_VNODE_SEARCH;
+ } else {
+ action |= KAUTH_VNODE_EXECUTE;
+ }
+ }
+ } else {
+ /* take advantage of definition of uflags */
+ action = uflags >> 8;
+ }
+
+#if CONFIG_MACF
+ error = mac_vnode_check_access(ctx, vp, uflags);
+ if (error)
+ return (error);
+#endif /* MAC */
+
+ /* action == 0 means only check for existence */
+ if (action != 0) {
+ error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx);
+ } else {
+ error = 0;
+ }
+
+ return(error);
+}
+
+
+
+/*
+ * access_extended: Check access permissions in bulk.
+ *
+ * Description: uap->entries Pointer to an array of accessx
+ * descriptor structs, plus one or
+ * more NULL terminated strings (see
+ * "Notes" section below).
+ * uap->size Size of the area pointed to by
+ * uap->entries.
+ * uap->results Pointer to the results array.
+ *
+ * Returns: 0 Success
+ * ENOMEM Insufficient memory
+ * EINVAL Invalid arguments
+ * namei:EFAULT Bad address
+ * namei:ENAMETOOLONG Filename too long
+ * namei:ENOENT No such file or directory
+ * namei:ELOOP Too many levels of symbolic links
+ * namei:EBADF Bad file descriptor
+ * namei:ENOTDIR Not a directory
+ * namei:???
+ * access1:
+ *
+ * Implicit returns:
+ * uap->results Array contents modified
+ *
+ * Notes: The uap->entries are structured as an arbitrary length array
+ * of accessx descriptors, followed by one or more NULL terminated
+ * strings
+ *
+ * struct accessx_descriptor[0]
+ * ...
+ * struct accessx_descriptor[n]
+ * char name_data[0];
+ *
+ * We determine the entry count by walking the buffer containing
+ * the uap->entries argument descriptor. For each descriptor we
+ * see, the valid values for the offset ad_name_offset will be
+ * in the byte range:
+ *
+ * [ uap->entries + sizeof(struct accessx_descriptor) ]
+ * to
+ * [ uap->entries + uap->size - 2 ]
+ *
+ * since we must have at least one string, and the string must
+ * be at least one character plus the NULL terminator in length.
+ *
+ * XXX: Need to support the check-as uid argument
+ */
+int
+access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval)
+{
+ struct accessx_descriptor *input = NULL;
+ errno_t *result = NULL;
+ errno_t error = 0;
+ int wantdelete = 0;
+ unsigned int desc_max, desc_actual, i, j;
+ struct vfs_context context;
+ struct nameidata nd;
+ int niopts;
+ vnode_t vp = NULL;
+ vnode_t dvp = NULL;
+#define ACCESSX_MAX_DESCR_ON_STACK 10
+ struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
+
+ context.vc_ucred = NULL;
+
+ /*
+ * Validate parameters; if valid, copy the descriptor array and string
+ * arguments into local memory. Before proceeding, the following
+ * conditions must have been met:
+ *
+ * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
+ * o There must be sufficient room in the request for at least one
+ * descriptor and a one yte NUL terminated string.
+ * o The allocation of local storage must not fail.
+ */
+ if (uap->size > ACCESSX_MAX_TABLESIZE)
+ return(ENOMEM);
+ if (uap->size < (sizeof(struct accessx_descriptor) + 2))
+ return(EINVAL);
+ if (uap->size <= sizeof (stack_input)) {
+ input = stack_input;
+ } else {
+ MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
+ if (input == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ }
+ error = copyin(uap->entries, input, uap->size);
+ if (error)
+ goto out;
+
+ AUDIT_ARG(opaque, input, uap->size);
+
+ /*
+ * Force NUL termination of the copyin buffer to avoid nami() running
+ * off the end. If the caller passes us bogus data, they may get a
+ * bogus result.
+ */
+ ((char *)input)[uap->size - 1] = 0;
+
+ /*
+ * Access is defined as checking against the process' real identity,
+ * even if operations are checking the effective identity. This
+ * requires that we use a local vfs context.
+ */
+ context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
+ context.vc_thread = current_thread();
+
+ /*
+ * Find out how many entries we have, so we can allocate the result
+ * array by walking the list and adjusting the count downward by the
+ * earliest string offset we see.
+ */
+ desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
+ desc_actual = desc_max;
+ for (i = 0; i < desc_actual; i++) {
+ /*
+ * Take the offset to the name string for this entry and
+ * convert to an input array index, which would be one off
+ * the end of the array if this entry was the lowest-addressed
+ * name string.
+ */
+ j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
+
+ /*
+ * An offset greater than the max allowable offset is an error.
+ * It is also an error for any valid entry to point
+ * to a location prior to the end of the current entry, if
+ * it's not a reference to the string of the previous entry.
+ */
+ if (j > desc_max || (j != 0 && j <= i)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * An offset of 0 means use the previous descriptor's offset;
+ * this is used to chain multiple requests for the same file
+ * to avoid multiple lookups.
+ */
+ if (j == 0) {
+ /* This is not valid for the first entry */
+ if (i == 0) {
+ error = EINVAL;
+ goto out;
+ }
+ continue;
+ }
+
+ /*
+ * If the offset of the string for this descriptor is before
+ * what we believe is the current actual last descriptor,
+ * then we need to adjust our estimate downward; this permits
+ * the string table following the last descriptor to be out
+ * of order relative to the descriptor list.
+ */
+ if (j < desc_actual)
+ desc_actual = j;
+ }
+
+ /*
+ * We limit the actual number of descriptors we are willing to process
+ * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
+ * requested does not exceed this limit,
+ */
+ if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
+ error = ENOMEM;
+ goto out;
+ }
+ MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK);
+ if (result == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Do the work by iterating over the descriptor entries we know to
+ * at least appear to contain valid data.
+ */
+ error = 0;
+ for (i = 0; i < desc_actual; i++) {
+ /*
+ * If the ad_name_offset is 0, then we use the previous
+ * results to make the check; otherwise, we are looking up
+ * a new file name.
+ */
+ if (input[i].ad_name_offset != 0) {
+ /* discard old vnodes */
+ if (vp) {
+ vnode_put(vp);
+ vp = NULL;
+ }
+ if (dvp) {
+ vnode_put(dvp);
+ dvp = NULL;
+ }
+
+ /*
+ * Scan forward in the descriptor list to see if we
+ * need the parent vnode. We will need it if we are
+ * deleting, since we must have rights to remove
+ * entries in the parent directory, as well as the
+ * rights to delete the object itself.
+ */
+ wantdelete = input[i].ad_flags & _DELETE_OK;
+ for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
+ if (input[j].ad_flags & _DELETE_OK)
+ wantdelete = 1;
+
+ niopts = FOLLOW | AUDITVNPATH1;
+
+ /* need parent for vnode_authorize for deletion test */
+ if (wantdelete)
+ niopts |= WANTPARENT;
+
+ /* do the lookup */
+ NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
+ CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
+ &context);
+ error = namei(&nd);
+ if (!error) {
+ vp = nd.ni_vp;
+ if (wantdelete)
+ dvp = nd.ni_dvp;
+ }
+ nameidone(&nd);
+ }
+
+ /*
+ * Handle lookup errors.
+ */
+ switch(error) {
+ case ENOENT:
+ case EACCES:
+ case EPERM:
+ case ENOTDIR:
+ result[i] = error;
+ break;
+ case 0:
+ /* run this access check */
+ result[i] = access1(vp, dvp, input[i].ad_flags, &context);
+ break;
+ default:
+ /* fatal lookup error */
+
+ goto out;
+ }
+ }
+
+ AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
+
+ /* copy out results */
+ error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
+
+out:
+ if (input && input != stack_input)
+ FREE(input, M_TEMP);
+ if (result)
+ FREE(result, M_TEMP);
+ if (vp)
+ vnode_put(vp);
+ if (dvp)
+ vnode_put(dvp);
+ if (IS_VALID_CRED(context.vc_ucred))
+ kauth_cred_unref(&context.vc_ucred);
+ return(error);
+}
+
+
+/*
+ * Returns: 0 Success
+ * namei:EFAULT Bad address
+ * namei:ENAMETOOLONG Filename too long
+ * namei:ENOENT No such file or directory
+ * namei:ELOOP Too many levels of symbolic links
+ * namei:EBADF Bad file descriptor
+ * namei:ENOTDIR Not a directory
+ * namei:???
+ * access1:
+ */
+static int
+faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
+ int flag, enum uio_seg segflg)
+{
+ int error;
+ struct nameidata nd;
+ int niopts;
+ struct vfs_context context;
+#if NAMEDRSRCFORK
+ int is_namedstream = 0;
+#endif
+
+ /*
+ * Unless the AT_EACCESS option is used, Access is defined as checking
+ * against the process' real identity, even if operations are checking
+ * the effective identity. So we need to tweak the credential
+ * in the context for that case.
+ */
+ if (!(flag & AT_EACCESS))
+ context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
+ else
+ context.vc_ucred = ctx->vc_ucred;
+ context.vc_thread = ctx->vc_thread;
+
+
+ niopts = FOLLOW | AUDITVNPATH1;
+ /* need parent for vnode_authorize for deletion test */
+ if (amode & _DELETE_OK)
+ niopts |= WANTPARENT;
+ NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
+ path, &context);
+
+#if NAMEDRSRCFORK
+ /* access(F_OK) calls are allowed for resource forks. */
+ if (amode == F_OK)
+ nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+ error = nameiat(&nd, fd);
+ if (error)
+ goto out;
+
+#if NAMEDRSRCFORK
+ /* Grab reference on the shadow stream file vnode to
+ * force an inactive on release which will mark it
+ * for recycle.
+ */
+ if (vnode_isnamedstream(nd.ni_vp) &&
+ (nd.ni_vp->v_parent != NULLVP) &&
+ vnode_isshadow(nd.ni_vp)) {
+ is_namedstream = 1;
+ vnode_ref(nd.ni_vp);
+ }
+#endif
+
+ error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
+
+#if NAMEDRSRCFORK
+ if (is_namedstream) {
+ vnode_rele(nd.ni_vp);
+ }
+#endif
+
+ vnode_put(nd.ni_vp);
+ if (amode & _DELETE_OK)
+ vnode_put(nd.ni_dvp);
+ nameidone(&nd);
+
+out:
+ if (!(flag & AT_EACCESS))
+ kauth_cred_unref(&context.vc_ucred);
+ return (error);
+}
+
+int
+access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval)
+{
+ return (faccessat_internal(vfs_context_current(), AT_FDCWD,
+ uap->path, uap->flags, 0, UIO_USERSPACE));
+}
+
+int
+faccessat(__unused proc_t p, struct faccessat_args *uap,
+ __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_EACCESS)
+ return (EINVAL);
+
+ return (faccessat_internal(vfs_context_current(), uap->fd,
+ uap->path, uap->amode, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Returns: 0 Success
+ * EFAULT
+ * copyout:EFAULT
+ * namei:???
+ * vn_stat:???
+ */
+static int
+fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
+ user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
+ enum uio_seg segflg, int fd, int flag)
+{
+ struct nameidata nd;
+ int follow;
+ union {
+ struct stat sb;
+ struct stat64 sb64;
+ } source;
+ union {
+ struct user64_stat user64_sb;
+ struct user32_stat user32_sb;
+ struct user64_stat64 user64_sb64;
+ struct user32_stat64 user32_sb64;
+ } dest;
+ caddr_t sbp;
+ int error, my_size;
+ kauth_filesec_t fsec;
+ size_t xsecurity_bufsize;
+ void * statptr;
+
+ follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_GETATTR, follow | AUDITVNPATH1,
+ segflg, path, ctx);
+
+#if NAMEDRSRCFORK
+ int is_namedstream = 0;
+ /* stat calls are allowed for resource forks. */
+ nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK;
+#endif
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ fsec = KAUTH_FILESEC_NONE;
+
+ statptr = (void *)&source;
+
+#if NAMEDRSRCFORK
+ /* Grab reference on the shadow stream file vnode to
+ * force an inactive on release which will mark it
+ * for recycle.
+ */
+ if (vnode_isnamedstream(nd.ni_vp) &&
+ (nd.ni_vp->v_parent != NULLVP) &&
+ vnode_isshadow(nd.ni_vp)) {
+ is_namedstream = 1;
+ vnode_ref(nd.ni_vp);
+ }
+#endif
+
+ error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
+
+#if NAMEDRSRCFORK
+ if (is_namedstream) {
+ vnode_rele(nd.ni_vp);
+ }
+#endif
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+
+ if (error)
+ return (error);
+ /* Zap spare fields */
+ if (isstat64 != 0) {
+ source.sb64.st_lspare = 0;
+ source.sb64.st_qspare[0] = 0LL;
+ source.sb64.st_qspare[1] = 0LL;
+ if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
+ munge_user64_stat64(&source.sb64, &dest.user64_sb64);
+ my_size = sizeof(dest.user64_sb64);
+ sbp = (caddr_t)&dest.user64_sb64;
+ } else {
+ munge_user32_stat64(&source.sb64, &dest.user32_sb64);
+ my_size = sizeof(dest.user32_sb64);
+ sbp = (caddr_t)&dest.user32_sb64;
+ }
+ /*
+ * Check if we raced (post lookup) against the last unlink of a file.
+ */
+ if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
+ source.sb64.st_nlink = 1;
+ }
+ } else {
+ source.sb.st_lspare = 0;
+ source.sb.st_qspare[0] = 0LL;
+ source.sb.st_qspare[1] = 0LL;
+ if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
+ munge_user64_stat(&source.sb, &dest.user64_sb);
+ my_size = sizeof(dest.user64_sb);
+ sbp = (caddr_t)&dest.user64_sb;
+ } else {
+ munge_user32_stat(&source.sb, &dest.user32_sb);
+ my_size = sizeof(dest.user32_sb);
+ sbp = (caddr_t)&dest.user32_sb;
+ }
+
+ /*
+ * Check if we raced (post lookup) against the last unlink of a file.
+ */
+ if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
+ source.sb.st_nlink = 1;
+ }
+ }
+ if ((error = copyout(sbp, ub, my_size)) != 0)
+ goto out;
+
+ /* caller wants extended security information? */
+ if (xsecurity != USER_ADDR_NULL) {
+
+ /* did we get any? */
+ if (fsec == KAUTH_FILESEC_NONE) {
+ if (susize(xsecurity_size, 0) != 0) {
+ error = EFAULT;
+ goto out;
+ }
+ } else {
+ /* find the user buffer size */
+ xsecurity_bufsize = fusize(xsecurity_size);
+
+ /* copy out the actual data size */
+ if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
+ error = EFAULT;
+ goto out;
+ }
+
+ /* if the caller supplied enough room, copy out to it */
+ if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
+ error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
+ }
+ }
+out:
+ if (fsec != KAUTH_FILESEC_NONE)
+ kauth_filesec_free(fsec);
+ return (error);
+}
+
+/*
+ * stat_extended: Get file status; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+stat_extended(__unused proc_t p, struct stat_extended_args *uap,
+ __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
+ 0));
+}
+
+/*
+ * Returns: 0 Success
+ * fstatat_internal:??? [see fstatat_internal() in this file]
+ */
+int
+stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
+}
+
+int
+stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
+}
+
+/*
+ * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
+ 0));
+}
+
+/*
+ * lstat_extended: Get file status; does not follow links; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
+ AT_SYMLINK_NOFOLLOW));
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+int
+lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
+}
+
+int
+lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
+}
+
+/*
+ * lstat64_extended: Get file status; can handle large inode numbers; does not
+ * follow links; with extended security (ACL).
+ *
+ * Parameters: p (ignored)
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path of file to get status from
+ * uap->ub User buffer (holds file status info)
+ * uap->xsecurity ACL to get (extended security)
+ * uap->xsecurity_size Size of ACL
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval)
+{
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
+ AT_SYMLINK_NOFOLLOW));
+}
+
+int
+fstatat(__unused proc_t p, struct fstatat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
+}
+
+int
+fstatat64(__unused proc_t p, struct fstatat64_args *uap,
+ __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
+ 0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
+}
+
+/*
+ * Get configurable pathname variables.
+ *
+ * Returns: 0 Success
+ * namei:???
+ * vn_pathconf:???
+ *
+ * Notes: Global implementation constants are intended to be
+ * implemented in this function directly; all other constants
+ * are per-FS implementation, and therefore must be handled in
+ * each respective FS, instead.
+ *
+ * XXX We implement some things globally right now that should actually be
+ * XXX per-FS; we will need to deal with this at some point.
+ */
+/* ARGSUSED */
+int
+pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval)
+{
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+
+ error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
+
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+/* ARGSUSED */
+static int
+readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
+ enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
+ int *retval)
+{
+ vnode_t vp;
+ uio_t auio;
+ int error;
+ struct nameidata nd;
+ char uio_buf[ UIO_SIZEOF(1) ];
+
+ NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1,
+ seg, path, ctx);
+
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
+ &uio_buf[0], sizeof(uio_buf));
+ uio_addiov(auio, buf, bufsize);
+ if (vp->v_type != VLNK) {
+ error = EINVAL;
+ } else {
+#if CONFIG_MACF
+ error = mac_vnode_check_readlink(ctx, vp);
+#endif
+ if (error == 0)
+ error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
+ ctx);
+ if (error == 0)
+ error = VNOP_READLINK(vp, auio, ctx);
+ }
+ vnode_put(vp);
+
+ *retval = bufsize - (int)uio_resid(auio);
+ return (error);
+}
+
+int
+readlink(proc_t p, struct readlink_args *uap, int32_t *retval)
+{
+ enum uio_seg procseg;
+
+ procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
+ CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
+ uap->count, procseg, retval));
+}
+
+int
+readlinkat(proc_t p, struct readlinkat_args *uap, int32_t *retval)
+{
+ enum uio_seg procseg;
+
+ procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
+ return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
+ procseg, uap->buf, uap->bufsize, procseg, retval));
+}
+
+/*
+ * Change file flags.
+ */
+static int
+chflags1(vnode_t vp, int flags, vfs_context_t ctx)
+{
+ struct vnode_attr va;
+ kauth_action_t action;
+ int error;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_flags, flags);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setflags(ctx, vp, flags);
+ if (error)
+ goto out;
+#endif
+
+ /* request authorisation, disregard immutability */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ /*
+ * Request that the auth layer disregard those file flags it's allowed to when
+ * authorizing this operation; we need to do this in order to be able to
+ * clear immutable flags.
+ */
+ if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+ if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
+ error = ENOTSUP;
+ }
+out:
+ vnode_put(vp);
+ return(error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+/* ARGSUSED */
+int
+chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+
+ AUDIT_ARG(fflags, uap->flags);
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+ nameidone(&nd);
+
+ error = chflags1(vp, uap->flags, ctx);
+
+ return(error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ AUDIT_ARG(fflags, uap->flags);
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ if ((error = vnode_getwithref(vp))) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = chflags1(vp, uap->flags, vfs_context_current());
+
+ file_drop(uap->fd);
+ return (error);
+}
+
+/*
+ * Change security information on a filesystem object.
+ *
+ * Returns: 0 Success
+ * EPERM Operation not permitted
+ * vnode_authattr:??? [anything vnode_authattr can return]
+ * vnode_authorize:??? [anything vnode_authorize can return]
+ * vnode_setattr:??? [anything vnode_setattr can return]
+ *
+ * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
+ * translated to EPERM before being returned.
+ */
+static int
+chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
+{
+ kauth_action_t action;
+ int error;
+
+ AUDIT_ARG(mode, vap->va_mode);
+ /* XXX audit new args */
+
+#if NAMEDSTREAMS
+ /* chmod calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ return (EPERM);
+ }
+#endif
+
+#if CONFIG_MACF
+ if (VATTR_IS_ACTIVE(vap, va_mode) &&
+ (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
+ return (error);
+#endif
+
+ /* make sure that the caller is allowed to set this security information */
+ if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) ||
+ ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (error == EACCES)
+ error = EPERM;
+ return(error);
+ }
+
+ error = vnode_setattr(vp, vap, ctx);
+
+ return (error);
+}
+
+
+/*
+ * Change mode of a file given a path name.
+ *
+ * Returns: 0 Success
+ * namei:??? [anything namei can return]
+ * chmod_vnode:??? [anything chmod_vnode can return]
+ */
+static int
+chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
+ int fd, int flag, enum uio_seg segflg)
+{
+ struct nameidata nd;
+ int follow, error;
+
+ follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1,
+ segflg, path, ctx);
+ if ((error = nameiat(&nd, fd)))
+ return (error);
+ error = chmod_vnode(ctx, nd.ni_vp, vap);
+ vnode_put(nd.ni_vp);
+ nameidone(&nd);
+ return(error);
+}
+
+/*
+ * chmod_extended: Change the mode of a file given a path name; with extended
+ * argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting the open
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->path Path to object (same as 'chmod')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->mode File mode to set (same as 'chmod')
+ * uap->xsecurity ACL to set (or delete)
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ * Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
+ *
+ * XXX: We should enummerate the possible errno values here, and where
+ * in the code they originated.
+ */
+int
+chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_filesec_t xsecdst;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ VATTR_INIT(&va);
+ if (uap->mode != -1)
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+ xsecdst = NULL;
+ switch(uap->xsecurity) {
+ /* explicit remove request */
+ case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ /* not being set */
+ case USER_ADDR_NULL:
+ break;
+ default:
+ if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return(error);
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+ KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
+ }
+
+ error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
+ UIO_USERSPACE);
+
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ return(error);
+}
+
+/*
+ * Returns: 0 Success
+ * chmodat:??? [anything chmodat can return]
+ */
+static int
+fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
+ int flag, enum uio_seg segflg)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, mode & ALLPERMS);
+
+ return (chmodat(ctx, path, &va, fd, flag, segflg));
+}
+
+int
+chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval)
+{
+ return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
+ AT_FDCWD, 0, UIO_USERSPACE));
+}
+
+int
+fchmodat(__unused proc_t p, struct fchmodat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
+ uap->fd, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+static int
+fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
+{
+ vnode_t vp;
+ int error;
+
+ AUDIT_ARG(fd, fd);
+
+ if ((error = file_vnode(fd, &vp)) != 0)
+ return (error);
+ if ((error = vnode_getwithref(vp)) != 0) {
+ file_drop(fd);
+ return(error);
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ error = chmod_vnode(vfs_context_current(), vp, vap);
+ (void)vnode_put(vp);
+ file_drop(fd);
+
+ return (error);
+}
+
+/*
+ * fchmod_extended: Change mode of a file given a file descriptor; with
+ * extended argument list (including extended security (ACL)).
+ *
+ * Parameters: p Process requesting to change file mode
+ * uap User argument descriptor (see below)
+ * retval (ignored)
+ *
+ * Indirect: uap->mode File mode to set (same as 'chmod')
+ * uap->uid UID to set
+ * uap->gid GID to set
+ * uap->xsecurity ACL to set (or delete)
+ * uap->fd File descriptor of file to change mode
+ *
+ * Returns: 0 Success
+ * !0 errno value
+ *
+ */
+int
+fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_filesec_t xsecdst;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+
+ VATTR_INIT(&va);
+ if (uap->mode != -1)
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+ if (uap->uid != KAUTH_UID_NONE)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != KAUTH_GID_NONE)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+ xsecdst = NULL;
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */
+ VATTR_SET(&va, va_acl, NULL);
+ break;
+ /* not being set */
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
+ return(error);
+ VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
+ }
+
+ error = fchmod1(p, uap->fd, &va);
+
+
+ switch(uap->xsecurity) {
+ case USER_ADDR_NULL:
+ case CAST_USER_ADDR_T(-1):
+ break;
+ default:
+ if (xsecdst != NULL)
+ kauth_filesec_free(xsecdst);
+ }
+ return(error);
+}
+
+int
+fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
+
+ return(fchmod1(p, uap->fd, &va));
+}
+
+
+/*
+ * Set ownership given a path name.
+ */
+/* ARGSUSED */
+static int
+fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
+ gid_t gid, int flag, enum uio_seg segflg)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ int error;
+ struct nameidata nd;
+ int follow;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uid, gid);
+
+ follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
+ NDINIT(&nd, LOOKUP, OP_SETATTR, follow | AUDITVNPATH1, segflg,
+ path, ctx);
+ error = nameiat(&nd, fd);
+ if (error)
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ if (uid != (uid_t)VNOVAL)
+ VATTR_SET(&va, va_uid, uid);
+ if (gid != (gid_t)VNOVAL)
+ VATTR_SET(&va, va_gid, gid);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uid, gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ /*
+ * EACCES is only allowed from namei(); permissions failure should
+ * return EPERM, so we need to translate the error code.
+ */
+ if (error == EACCES)
+ error = EPERM;
+
+ vnode_put(vp);
+ return (error);
+}
+
+int
+chown(__unused proc_t p, struct chown_args *uap, __unused int32_t *retval)
+{
+ return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+ uap->uid, uap->gid, 0, UIO_USERSPACE));
+}
+
+int
+lchown(__unused proc_t p, struct lchown_args *uap, __unused int32_t *retval)
+{
+ return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
+ uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
+}
+
+int
+fchownat(__unused proc_t p, struct fchownat_args *uap, __unused int32_t *retval)
+{
+ if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
+ uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+/* ARGSUSED */
+int
+fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval)
+{
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ vnode_t vp;
+ int error;
+ kauth_action_t action;
+
+ AUDIT_ARG(owner, uap->uid, uap->gid);
+ AUDIT_ARG(fd, uap->fd);
+
+ if ( (error = file_vnode(uap->fd, &vp)) )
+ return (error);
+
+ if ( (error = vnode_getwithref(vp)) ) {
+ file_drop(uap->fd);
+ return(error);
+ }
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ if (uap->uid != VNOVAL)
+ VATTR_SET(&va, va_uid, uap->uid);
+ if (uap->gid != VNOVAL)
+ VATTR_SET(&va, va_gid, uap->gid);
+
+#if NAMEDSTREAMS
+ /* chown calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
+ if (error)
+ goto out;
+#endif
+
+ /* preflight and authorize attribute changes */
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ (void)vnode_put(vp);
+ file_drop(uap->fd);
+ return (error);
+}
+
+static int
+getutimes(user_addr_t usrtvp, struct timespec *tsp)
+{
+ int error;
+
+ if (usrtvp == USER_ADDR_NULL) {
+ struct timeval old_tv;
+ /* XXX Y2038 bug because of microtime argument */
+ microtime(&old_tv);
+ TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
+ tsp[1] = tsp[0];
+ } else {
+ if (IS_64BIT_PROCESS(current_proc())) {
+ struct user64_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ } else {
+ struct user32_timeval tv[2];
+ error = copyin(usrtvp, (void *)tv, sizeof(tv));
+ if (error)
+ return (error);
+ TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
+ TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
+ }
+ }
+ return 0;
+}
+
+static int
+setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
+ int nullflag)
+{
+ int error;
+ struct vnode_attr va;
+ kauth_action_t action;
+
+ AUDIT_ARG(vnpath, vp, ARG_VNODE1);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_access_time, ts[0]);
+ VATTR_SET(&va, va_modify_time, ts[1]);
+ if (nullflag)
+ va.va_vaflags |= VA_UTIMES_NULL;
+
+#if NAMEDSTREAMS
+ /* utimes calls are not allowed for resource forks. */
+ if (vp->v_flag & VISNAMEDSTREAM) {
+ error = EPERM;
+ goto out;
+ }
+#endif
+
+#if CONFIG_MACF
+ error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
+ if (error)
+ goto out;
+#endif
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+
+ /* since we may not need to auth anything, check here */
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
+ if (!nullflag && error == EACCES)
+ error = EPERM;
+ goto out;
+ }
+ error = vnode_setattr(vp, &va, ctx);
+
+out:
+ return error;
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ user_addr_t usrtvp;
+ int error;
+ struct nameidata nd;
+ vfs_context_t ctx = vfs_context_current();
+
+ /*
+ * AUDIT: Needed to change the order of operations to do the
+ * name lookup first because auditing wants the path.
+ */
+ NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ error = namei(&nd);
+ if (error)
+ return (error);
+ nameidone(&nd);
+
+ /*
+ * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
+ * the current time instead.
+ */
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ goto out;
+
+ error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
+
+out:
+ vnode_put(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+/* ARGSUSED */
+int
+futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval)
+{
+ struct timespec ts[2];
+ vnode_t vp;
+ user_addr_t usrtvp;
+ int error;
+
+ AUDIT_ARG(fd, uap->fd);
+ usrtvp = uap->tptr;
+ if ((error = getutimes(usrtvp, ts)) != 0)
+ return (error);
+ if ((error = file_vnode(uap->fd, &vp)) != 0)
+ return (error);
+ if((error = vnode_getwithref(vp))) {
+ file_drop(uap->fd);
+ return(error);
+ }
+
+ error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
+ vnode_put(vp);
+ file_drop(uap->fd);
+ return(error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+/* ARGSUSED */
+int
+truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval)
+{
+ vnode_t vp;
+ struct vnode_attr va;
+ vfs_context_t ctx = vfs_context_current();
+ int error;
+ struct nameidata nd;
+ kauth_action_t action;
+
+ if (uap->length < 0)
+ return(EINVAL);
+ NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1,
+ UIO_USERSPACE, uap->path, ctx);
+ if ((error = namei(&nd)))
+ return (error);
+ vp = nd.ni_vp;
+
+ nameidone(&nd);
+
+ VATTR_INIT(&va);
+ VATTR_SET(&va, va_data_size, uap->length);
+
+#if CONFIG_MACF
+ error = mac_vnode_check_truncate(ctx, NOCRED, vp);
+ if (error)
+ goto out;
+#endif
+
+ if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
+ goto out;
+ if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
+ goto out;
+ error = vnode_setattr(vp, &va, ctx);
+out:
+ vnode_put(vp);
+ return (error);