git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1995-2015 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Copyright (c) 1989, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	* (c) UNIX System Laboratories, Inc.
	32	* All or some portions of this file are derived from material licensed
	33	* to the University of California by American Telephone and Telegraph
	34	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	35	* the permission of UNIX System Laboratories, Inc.
	36	*
	37	* Redistribution and use in source and binary forms, with or without
	38	* modification, are permitted provided that the following conditions
	39	* are met:
	40	* 1. Redistributions of source code must retain the above copyright
	41	* notice, this list of conditions and the following disclaimer.
	42	* 2. Redistributions in binary form must reproduce the above copyright
	43	* notice, this list of conditions and the following disclaimer in the
	44	* documentation and/or other materials provided with the distribution.
	45	* 3. All advertising materials mentioning features or use of this software
	46	* must display the following acknowledgement:
	47	* This product includes software developed by the University of
	48	* California, Berkeley and its contributors.
	49	* 4. Neither the name of the University nor the names of its contributors
	50	* may be used to endorse or promote products derived from this software
	51	* without specific prior written permission.
	52	*
	53	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	54	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	55	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	56	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	57	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	58	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	59	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	60	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	61	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	62	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	63	* SUCH DAMAGE.
	64	*
	65	* @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95
	66	*/
	67	/*
	68	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	69	* support for mandatory and extensible security protections. This notice
	70	* is included in support of clause 2.2 (b) of the Apple Public License,
	71	* Version 2.0.
	72	*/
	73
	74	#include <sys/param.h>
	75	#include <sys/systm.h>
	76	#include <sys/namei.h>
	77	#include <sys/filedesc.h>
	78	#include <sys/kernel.h>
	79	#include <sys/file_internal.h>
	80	#include <sys/stat.h>
	81	#include <sys/vnode_internal.h>
	82	#include <sys/mount_internal.h>
	83	#include <sys/proc_internal.h>
	84	#include <sys/kauth.h>
	85	#include <sys/uio_internal.h>
	86	#include <sys/malloc.h>
	87	#include <sys/mman.h>
	88	#include <sys/dirent.h>
	89	#include <sys/attr.h>
	90	#include <sys/sysctl.h>
	91	#include <sys/ubc.h>
	92	#include <sys/quota.h>
	93	#include <sys/kdebug.h>
	94	#include <sys/fsevents.h>
	95	#include <sys/imgsrc.h>
	96	#include <sys/sysproto.h>
	97	#include <sys/xattr.h>
	98	#include <sys/fcntl.h>
	99	#include <sys/fsctl.h>
	100	#include <sys/ubc_internal.h>
	101	#include <sys/disk.h>
	102	#include <sys/content_protection.h>
	103	#include <machine/cons.h>
	104	#include <machine/limits.h>
	105	#include <miscfs/specfs/specdev.h>
	106
	107	#include <security/audit/audit.h>
	108	#include <bsm/audit_kevents.h>
	109
	110	#include <mach/mach_types.h>
	111	#include <kern/kern_types.h>
	112	#include <kern/kalloc.h>
	113	#include <kern/task.h>
	114
	115	#include <vm/vm_pageout.h>
	116
	117	#include <libkern/OSAtomic.h>
	118	#include <pexpert/pexpert.h>
	119	#include <IOKit/IOBSD.h>
	120
	121	#if CONFIG_MACF
	122	#include <security/mac.h>
	123	#include <security/mac_framework.h>
	124	#endif
	125
	126	#if CONFIG_FSE
	127	#define GET_PATH(x) \
	128	(x) = get_pathbuff();
	129	#define RELEASE_PATH(x) \
	130	release_pathbuff(x);
	131	#else
	132	#define GET_PATH(x) \
	133	MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
	134	#define RELEASE_PATH(x) \
	135	FREE_ZONE((x), MAXPATHLEN, M_NAMEI);
	136	#endif /* CONFIG_FSE */
	137
	138	/* struct for checkdirs iteration */
	139	struct cdirargs {
	140	vnode_t olddp;
	141	vnode_t newdp;
	142	};
	143	/* callback for checkdirs iteration */
	144	static int checkdirs_callback(proc_t p, void * arg);
	145
	146	static int change_dir(struct nameidata *ndp, vfs_context_t ctx);
	147	static int checkdirs(vnode_t olddp, vfs_context_t ctx);
	148	void enablequotas(struct mount *mp, vfs_context_t ctx);
	149	static int getfsstat_callback(mount_t mp, void * arg);
	150	static int getutimes(user_addr_t usrtvp, struct timespec *tsp);
	151	static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag);
	152	static int sync_callback(mount_t, void *);
	153	static void sync_thread(void *, __unused wait_result_t);
	154	static int sync_async(int);
	155	static int munge_statfs(struct mount mp, struct vfsstatfs sfsp,
	156	user_addr_t bufp, int *sizep, boolean_t is_64_bit,
	157	boolean_t partial_copy);
	158	static int statfs64_common(struct mount mp, struct vfsstatfs sfsp,
	159	user_addr_t bufp);
	160	static int fsync_common(proc_t p, struct fsync_args *uap, int flags);
	161	static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
	162	struct componentname *cnp, user_addr_t fsmountargs,
	163	int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount,
	164	vfs_context_t ctx);
	165	void vfs_notify_mount(vnode_t pdvp);
	166
	167	int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname cnp, const char fsname, boolean_t skip_auth);
	168
	169	struct fd_vn_data * fg_vn_data_alloc(void);
	170
	171	/*
	172	* Max retries for ENOENT returns from vn_authorize_{rmdir, unlink, rename}
	173	* Concurrent lookups (or lookups by ids) on hard links can cause the
	174	* vn_getpath (which does not re-enter the filesystem as vn_getpath_fsenter
	175	* does) to return ENOENT as the path cannot be returned from the name cache
	176	* alone. We have no option but to retry and hope to get one namei->reverse path
	177	* generation done without an intervening lookup, lookup by id on the hard link
	178	* item. This is only an issue for MAC hooks which cannot reenter the filesystem
	179	* which currently are the MAC hooks for rename, unlink and rmdir.
	180	*/
	181	#define MAX_AUTHORIZE_ENOENT_RETRIES 1024
	182
	183	static int rmdirat_internal(vfs_context_t, int, user_addr_t, enum uio_seg);
	184
	185	static int fsgetpath_internal(vfs_context_t, int, uint64_t, vm_size_t, caddr_t, int *);
	186
	187	#ifdef CONFIG_IMGSRC_ACCESS
	188	static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx);
	189	static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx);
	190	static void undo_place_on_covered_vp(mount_t mp, vnode_t vp);
	191	static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags);
	192	static void mount_end_update(mount_t mp);
	193	static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname cnp, const char fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index);
	194	#endif /* CONFIG_IMGSRC_ACCESS */
	195
	196	int (union_dircheckp)(struct vnode , struct fileproc , vfs_context_t);
	197
	198	__private_extern__
	199	int sync_internal(void);
	200
	201	__private_extern__
	202	int unlink1(vfs_context_t, vnode_t, user_addr_t, enum uio_seg, int);
	203
	204	extern lck_grp_t *fd_vn_lck_grp;
	205	extern lck_grp_attr_t *fd_vn_lck_grp_attr;
	206	extern lck_attr_t *fd_vn_lck_attr;
	207
	208	/*
	209	* incremented each time a mount or unmount operation occurs
	210	* used to invalidate the cached value of the rootvp in the
	211	* mount structure utilized by cache_lookup_path
	212	*/
	213	uint32_t mount_generation = 0;
	214
	215	/* counts number of mount and unmount operations */
	216	unsigned int vfs_nummntops=0;
	217
	218	extern const struct fileops vnops;
	219	#if CONFIG_APPLEDOUBLE
	220	extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *);
	221	#endif /* CONFIG_APPLEDOUBLE */
	222
	223	typedef uint32_t vfs_rename_flags_t;
	224	#if CONFIG_SECLUDED_RENAME
	225	enum {
	226	VFS_SECLUDE_RENAME = 0x00000001
	227	};
	228	#endif
	229
	230	/*
	231	* Virtual File System System Calls
	232	*/
	233
	234	#if NFSCLIENT \|\| DEVFS
	235	/*
	236	* Private in-kernel mounting spi (NFS only, not exported)
	237	*/
	238	__private_extern__
	239	boolean_t
	240	vfs_iskernelmount(mount_t mp)
	241	{
	242	return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE);
	243	}
	244
	245	__private_extern__
	246	int
	247	kernel_mount(char fstype, vnode_t pvp, vnode_t vp, const char path,
	248	void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx)
	249	{
	250	struct nameidata nd;
	251	boolean_t did_namei;
	252	int error;
	253
	254	NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW \| AUDITVNPATH1 \| WANTPARENT,
	255	UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx);
	256
	257	/*
	258	* Get the vnode to be covered if it's not supplied
	259	*/
	260	if (vp == NULLVP) {
	261	error = namei(&nd);
	262	if (error)
	263	return (error);
	264	vp = nd.ni_vp;
	265	pvp = nd.ni_dvp;
	266	did_namei = TRUE;
	267	} else {
	268	char pnbuf = CAST_DOWN(char , path);
	269
	270	nd.ni_cnd.cn_pnbuf = pnbuf;
	271	nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1;
	272	did_namei = FALSE;
	273	}
	274
	275	error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data),
	276	syscall_flags, kern_flags, NULL, TRUE, ctx);
	277
	278	if (did_namei) {
	279	vnode_put(vp);
	280	vnode_put(pvp);
	281	nameidone(&nd);
	282	}
	283
	284	return (error);
	285	}
	286	#endif /* NFSCLIENT \|\| DEVFS */
	287
	288	/*
	289	* Mount a file system.
	290	*/
	291	/* ARGSUSED */
	292	int
	293	mount(proc_t p, struct mount_args uap, __unused int32_t retval)
	294	{
	295	struct __mac_mount_args muap;
	296
	297	muap.type = uap->type;
	298	muap.path = uap->path;
	299	muap.flags = uap->flags;
	300	muap.data = uap->data;
	301	muap.mac_p = USER_ADDR_NULL;
	302	return (__mac_mount(p, &muap, retval));
	303	}
	304
	305	void
	306	vfs_notify_mount(vnode_t pdvp)
	307	{
	308	vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL);
	309	lock_vnode_and_post(pdvp, NOTE_WRITE);
	310	}
	311
	312	/*
	313	* __mac_mount:
	314	* Mount a file system taking into account MAC label behavior.
	315	* See mount(2) man page for more information
	316	*
	317	* Parameters: p Process requesting the mount
	318	* uap User argument descriptor (see below)
	319	* retval (ignored)
	320	*
	321	* Indirect: uap->type Filesystem type
	322	* uap->path Path to mount
	323	* uap->data Mount arguments
	324	* uap->mac_p MAC info
	325	* uap->flags Mount flags
	326	*
	327	*
	328	* Returns: 0 Success
	329	* !0 Not success
	330	*/
	331	boolean_t root_fs_upgrade_try = FALSE;
	332
	333	int
	334	__mac_mount(struct proc p, register struct __mac_mount_args uap, __unused int32_t *retval)
	335	{
	336	vnode_t pvp = NULL;
	337	vnode_t vp = NULL;
	338	int need_nameidone = 0;
	339	vfs_context_t ctx = vfs_context_current();
	340	char fstypename[MFSNAMELEN];
	341	struct nameidata nd;
	342	size_t dummy=0;
	343	char *labelstr = NULL;
	344	int flags = uap->flags;
	345	int error;
	346	#if CONFIG_IMGSRC_ACCESS \|\| CONFIG_MACF
	347	boolean_t is_64bit = IS_64BIT_PROCESS(p);
	348	#else
	349	#pragma unused(p)
	350	#endif
	351	/*
	352	* Get the fs type name from user space
	353	*/
	354	error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy);
	355	if (error)
	356	return (error);
	357
	358	/*
	359	* Get the vnode to be covered
	360	*/
	361	NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW \| AUDITVNPATH1 \| WANTPARENT,
	362	UIO_USERSPACE, uap->path, ctx);
	363	error = namei(&nd);
	364	if (error) {
	365	goto out;
	366	}
	367	need_nameidone = 1;
	368	vp = nd.ni_vp;
	369	pvp = nd.ni_dvp;
	370
	371	#ifdef CONFIG_IMGSRC_ACCESS
	372	/* Mounting image source cannot be batched with other operations */
	373	if (flags == MNT_IMGSRC_BY_INDEX) {
	374	error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename,
	375	ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX));
	376	goto out;
	377	}
	378	#endif /* CONFIG_IMGSRC_ACCESS */
	379
	380	#if CONFIG_MACF
	381	/*
	382	* Get the label string (if any) from user space
	383	*/
	384	if (uap->mac_p != USER_ADDR_NULL) {
	385	struct user_mac mac;
	386	size_t ulen = 0;
	387
	388	if (is_64bit) {
	389	struct user64_mac mac64;
	390	error = copyin(uap->mac_p, &mac64, sizeof(mac64));
	391	mac.m_buflen = mac64.m_buflen;
	392	mac.m_string = mac64.m_string;
	393	} else {
	394	struct user32_mac mac32;
	395	error = copyin(uap->mac_p, &mac32, sizeof(mac32));
	396	mac.m_buflen = mac32.m_buflen;
	397	mac.m_string = mac32.m_string;
	398	}
	399	if (error)
	400	goto out;
	401	if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) \|\|
	402	(mac.m_buflen < 2)) {
	403	error = EINVAL;
	404	goto out;
	405	}
	406	MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK);
	407	error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen);
	408	if (error) {
	409	goto out;
	410	}
	411	AUDIT_ARG(mac_string, labelstr);
	412	}
	413	#endif /* CONFIG_MACF */
	414
	415	AUDIT_ARG(fflags, flags);
	416
	417	#if SECURE_KERNEL
	418	if (flags & MNT_UNION) {
	419	/* No union mounts on release kernels */
	420	error = EPERM;
	421	goto out;
	422	}
	423	#endif
	424
	425	if ((vp->v_flag & VROOT) &&
	426	(vp->v_mount->mnt_flag & MNT_ROOTFS)) {
	427	if (!(flags & MNT_UNION)) {
	428	flags \|= MNT_UPDATE;
	429	}
	430	else {
	431	/*
	432	* For a union mount on '/', treat it as fresh
	433	* mount instead of update.
	434	* Otherwise, union mouting on '/' used to panic the
	435	* system before, since mnt_vnodecovered was found to
	436	* be NULL for '/' which is required for unionlookup
	437	* after it gets ENOENT on union mount.
	438	*/
	439	flags = (flags & ~(MNT_UPDATE));
	440	}
	441
	442	#if SECURE_KERNEL
	443	if ((flags & MNT_RDONLY) == 0) {
	444	/* Release kernels are not allowed to mount "/" as rw */
	445	error = EPERM;
	446	goto out;
	447	}
	448	#endif
	449	/*
	450	* See 7392553 for more details on why this check exists.
	451	* Suffice to say: If this check is ON and something tries
	452	* to mount the rootFS RW, we'll turn off the codesign
	453	* bitmap optimization.
	454	*/
	455	#if CHECK_CS_VALIDATION_BITMAP
	456	if ((flags & MNT_RDONLY) == 0 ) {
	457	root_fs_upgrade_try = TRUE;
	458	}
	459	#endif
	460	}
	461
	462	error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0,
	463	labelstr, FALSE, ctx);
	464
	465	out:
	466
	467	#if CONFIG_MACF
	468	if (labelstr)
	469	FREE(labelstr, M_MACTEMP);
	470	#endif /* CONFIG_MACF */
	471
	472	if (vp) {
	473	vnode_put(vp);
	474	}
	475	if (pvp) {
	476	vnode_put(pvp);
	477	}
	478	if (need_nameidone) {
	479	nameidone(&nd);
	480	}
	481
	482	return (error);
	483	}
	484
	485	/*
	486	* common mount implementation (final stage of mounting)
	487
	488	* Arguments:
	489	* fstypename file system type (ie it's vfs name)
	490	* pvp parent of covered vnode
	491	* vp covered vnode
	492	* cnp component name (ie path) of covered vnode
	493	* flags generic mount flags
	494	* fsmountargs file system specific data
	495	* labelstr optional MAC label
	496	* kernelmount TRUE for mounts initiated from inside the kernel
	497	* ctx caller's context
	498	*/
	499	static int
	500	mount_common(char *fstypename, vnode_t pvp, vnode_t vp,
	501	struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags,
	502	char *labelstr, boolean_t kernelmount, vfs_context_t ctx)
	503	{
	504	#if !CONFIG_MACF
	505	#pragma unused(labelstr)
	506	#endif
	507	struct vnode *devvp = NULLVP;
	508	struct vnode *device_vnode = NULLVP;
	509	#if CONFIG_MACF
	510	struct vnode *rvp;
	511	#endif
	512	struct mount *mp;
	513	struct vfstable vfsp = (struct vfstable )0;
	514	struct proc *p = vfs_context_proc(ctx);
	515	int error, flag = 0;
	516	user_addr_t devpath = USER_ADDR_NULL;
	517	int ronly = 0;
	518	int mntalloc = 0;
	519	boolean_t vfsp_ref = FALSE;
	520	boolean_t is_rwlock_locked = FALSE;
	521	boolean_t did_rele = FALSE;
	522	boolean_t have_usecount = FALSE;
	523
	524	/*
	525	* Process an update for an existing mount
	526	*/
	527	if (flags & MNT_UPDATE) {
	528	if ((vp->v_flag & VROOT) == 0) {
	529	error = EINVAL;
	530	goto out1;
	531	}
	532	mp = vp->v_mount;
	533
	534	/* unmount in progress return error */
	535	mount_lock_spin(mp);
	536	if (mp->mnt_lflag & MNT_LUNMOUNT) {
	537	mount_unlock(mp);
	538	error = EBUSY;
	539	goto out1;
	540	}
	541	mount_unlock(mp);
	542	lck_rw_lock_exclusive(&mp->mnt_rwlock);
	543	is_rwlock_locked = TRUE;
	544	/*
	545	* We only allow the filesystem to be reloaded if it
	546	* is currently mounted read-only.
	547	*/
	548	if ((flags & MNT_RELOAD) &&
	549	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	550	error = ENOTSUP;
	551	goto out1;
	552	}
	553
	554	/*
	555	* If content protection is enabled, update mounts are not
	556	* allowed to turn it off.
	557	*/
	558	if ((mp->mnt_flag & MNT_CPROTECT) &&
	559	((flags & MNT_CPROTECT) == 0)) {
	560	error = EINVAL;
	561	goto out1;
	562	}
	563
	564	#ifdef CONFIG_IMGSRC_ACCESS
	565	/* Can't downgrade the backer of the root FS */
	566	if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) &&
	567	(!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) {
	568	error = ENOTSUP;
	569	goto out1;
	570	}
	571	#endif /* CONFIG_IMGSRC_ACCESS */
	572
	573	/*
	574	* Only root, or the user that did the original mount is
	575	* permitted to update it.
	576	*/
	577	if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
	578	(error = suser(vfs_context_ucred(ctx), &p->p_acflag))) {
	579	goto out1;
	580	}
	581	#if CONFIG_MACF
	582	error = mac_mount_check_remount(ctx, mp);
	583	if (error != 0) {
	584	goto out1;
	585	}
	586	#endif
	587	/*
	588	* For non-root users, silently enforce MNT_NOSUID and MNT_NODEV,
	589	* and MNT_NOEXEC if mount point is already MNT_NOEXEC.
	590	*/
	591	if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
	592	flags \|= MNT_NOSUID \| MNT_NODEV;
	593	if (mp->mnt_flag & MNT_NOEXEC)
	594	flags \|= MNT_NOEXEC;
	595	}
	596	flag = mp->mnt_flag;
	597
	598
	599
	600	mp->mnt_flag \|= flags & (MNT_RELOAD \| MNT_FORCE \| MNT_UPDATE);
	601
	602	vfsp = mp->mnt_vtable;
	603	goto update;
	604	}
	605	/*
	606	* For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and
	607	* MNT_NOEXEC if mount point is already MNT_NOEXEC.
	608	*/
	609	if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) {
	610	flags \|= MNT_NOSUID \| MNT_NODEV;
	611	if (vp->v_mount->mnt_flag & MNT_NOEXEC)
	612	flags \|= MNT_NOEXEC;
	613	}
	614
	615	/* XXXAUDIT: Should we capture the type on the error path as well? */
	616	AUDIT_ARG(text, fstypename);
	617	mount_list_lock();
	618	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
	619	if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) {
	620	vfsp->vfc_refcount++;
	621	vfsp_ref = TRUE;
	622	break;
	623	}
	624	mount_list_unlock();
	625	if (vfsp == NULL) {
	626	error = ENODEV;
	627	goto out1;
	628	}
	629
	630	/*
	631	* VFC_VFSLOCALARGS is not currently supported for kernel mounts
	632	*/
	633	if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) {
	634	error = EINVAL; /* unsupported request */
	635	goto out1;
	636	}
	637
	638	error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0));
	639	if (error != 0) {
	640	goto out1;
	641	}
	642
	643	/*
	644	* Allocate and initialize the filesystem (mount_t)
	645	*/
	646	MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount),
	647	M_MOUNT, M_WAITOK);
	648	bzero((char *)mp, (u_int32_t)sizeof(struct mount));
	649	mntalloc = 1;
	650
	651	/* Initialize the default IO constraints */
	652	mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS;
	653	mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32;
	654	mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt;
	655	mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt;
	656	mp->mnt_devblocksize = DEV_BSIZE;
	657	mp->mnt_alignmentmask = PAGE_MASK;
	658	mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH;
	659	mp->mnt_ioscale = 1;
	660	mp->mnt_ioflags = 0;
	661	mp->mnt_realrootvp = NULLVP;
	662	mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL;
	663
	664	TAILQ_INIT(&mp->mnt_vnodelist);
	665	TAILQ_INIT(&mp->mnt_workerqueue);
	666	TAILQ_INIT(&mp->mnt_newvnodes);
	667	mount_lock_init(mp);
	668	lck_rw_lock_exclusive(&mp->mnt_rwlock);
	669	is_rwlock_locked = TRUE;
	670	mp->mnt_op = vfsp->vfc_vfsops;
	671	mp->mnt_vtable = vfsp;
	672	//mp->mnt_stat.f_type = vfsp->vfc_typenum;
	673	mp->mnt_flag \|= vfsp->vfc_flags & MNT_VISFLAGMASK;
	674	strlcpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN);
	675	strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
	676	mp->mnt_vnodecovered = vp;
	677	mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
	678	mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1;
	679	mp->mnt_devbsdunit = 0;
	680
	681	/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
	682	vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
	683
	684	#if NFSCLIENT \|\| DEVFS
	685	if (kernelmount)
	686	mp->mnt_kern_flag \|= MNTK_KERNEL_MOUNT;
	687	if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0)
	688	mp->mnt_kern_flag \|= MNTK_PERMIT_UNMOUNT;
	689	#endif /* NFSCLIENT \|\| DEVFS */
	690
	691	update:
	692	/*
	693	* Set the mount level flags.
	694	*/
	695	if (flags & MNT_RDONLY)
	696	mp->mnt_flag \|= MNT_RDONLY;
	697	else if (mp->mnt_flag & MNT_RDONLY) {
	698	// disallow read/write upgrades of file systems that
	699	// had the TYPENAME_OVERRIDE feature set.
	700	if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
	701	error = EPERM;
	702	goto out1;
	703	}
	704	mp->mnt_kern_flag \|= MNTK_WANTRDWR;
	705	}
	706	mp->mnt_flag &= ~(MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	707	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \|
	708	MNT_UNKNOWNPERMISSIONS \| MNT_DONTBROWSE \|
	709	MNT_AUTOMOUNTED \| MNT_DEFWRITE \| MNT_NOATIME \|
	710	MNT_QUARANTINE \| MNT_CPROTECT);
	711	mp->mnt_flag \|= flags & (MNT_NOSUID \| MNT_NOEXEC \| MNT_NODEV \|
	712	MNT_SYNCHRONOUS \| MNT_UNION \| MNT_ASYNC \|
	713	MNT_UNKNOWNPERMISSIONS \| MNT_DONTBROWSE \|
	714	MNT_AUTOMOUNTED \| MNT_DEFWRITE \| MNT_NOATIME \|
	715	MNT_QUARANTINE \| MNT_CPROTECT);
	716
	717	#if CONFIG_MACF
	718	if (flags & MNT_MULTILABEL) {
	719	if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) {
	720	error = EINVAL;
	721	goto out1;
	722	}
	723	mp->mnt_flag \|= MNT_MULTILABEL;
	724	}
	725	#endif
	726	/*
	727	* Process device path for local file systems if requested
	728	*/
	729	if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
	730	if (vfs_context_is64bit(ctx)) {
	731	if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
	732	goto out1;
	733	fsmountargs += sizeof(devpath);
	734	} else {
	735	user32_addr_t tmp;
	736	if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
	737	goto out1;
	738	/* munge into LP64 addr */
	739	devpath = CAST_USER_ADDR_T(tmp);
	740	fsmountargs += sizeof(tmp);
	741	}
	742
	743	/* Lookup device and authorize access to it */
	744	if ((devpath)) {
	745	struct nameidata nd;
	746
	747	NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx);
	748	if ( (error = namei(&nd)) )
	749	goto out1;
	750
	751	strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
	752	devvp = nd.ni_vp;
	753
	754	nameidone(&nd);
	755
	756	if (devvp->v_type != VBLK) {
	757	error = ENOTBLK;
	758	goto out2;
	759	}
	760	if (major(devvp->v_rdev) >= nblkdev) {
	761	error = ENXIO;
	762	goto out2;
	763	}
	764	/*
	765	* If mount by non-root, then verify that user has necessary
	766	* permissions on the device.
	767	*/
	768	if (suser(vfs_context_ucred(ctx), NULL) != 0) {
	769	mode_t accessmode = KAUTH_VNODE_READ_DATA;
	770
	771	if ((mp->mnt_flag & MNT_RDONLY) == 0)
	772	accessmode \|= KAUTH_VNODE_WRITE_DATA;
	773	if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0)
	774	goto out2;
	775	}
	776	}
	777	/* On first mount, preflight and open device */
	778	if (devpath && ((flags & MNT_UPDATE) == 0)) {
	779	if ( (error = vnode_ref(devvp)) )
	780	goto out2;
	781	/*
	782	* Disallow multiple mounts of the same device.
	783	* Disallow mounting of a device that is currently in use
	784	* (except for root, which might share swap device for miniroot).
	785	* Flush out any old buffers remaining from a previous use.
	786	*/
	787	if ( (error = vfs_mountedon(devvp)) )
	788	goto out3;
	789
	790	if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) {
	791	error = EBUSY;
	792	goto out3;
	793	}
	794	if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) {
	795	error = ENOTBLK;
	796	goto out3;
	797	}
	798	if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) )
	799	goto out3;
	800
	801	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
	802	#if CONFIG_MACF
	803	error = mac_vnode_check_open(ctx,
	804	devvp,
	805	ronly ? FREAD : FREAD\|FWRITE);
	806	if (error)
	807	goto out3;
	808	#endif /* MAC */
	809	if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD\|FWRITE, ctx)) )
	810	goto out3;
	811
	812	mp->mnt_devvp = devvp;
	813	device_vnode = devvp;
	814
	815	} else if ((mp->mnt_flag & MNT_RDONLY) &&
	816	(mp->mnt_kern_flag & MNTK_WANTRDWR) &&
	817	(device_vnode = mp->mnt_devvp)) {
	818	dev_t dev;
	819	int maj;
	820	/*
	821	* If upgrade to read-write by non-root, then verify
	822	* that user has necessary permissions on the device.
	823	*/
	824	vnode_getalways(device_vnode);
	825
	826	if (suser(vfs_context_ucred(ctx), NULL) &&
	827	(error = vnode_authorize(device_vnode, NULL,
	828	KAUTH_VNODE_READ_DATA \| KAUTH_VNODE_WRITE_DATA,
	829	ctx)) != 0) {
	830	vnode_put(device_vnode);
	831	goto out2;
	832	}
	833
	834	/* Tell the device that we're upgrading */
	835	dev = (dev_t)device_vnode->v_rdev;
	836	maj = major(dev);
	837
	838	if ((u_int)maj >= (u_int)nblkdev)
	839	panic("Volume mounted on a device with invalid major number.");
	840
	841	error = bdevsw[maj].d_open(dev, FREAD \| FWRITE, S_IFBLK, p);
	842	vnode_put(device_vnode);
	843	device_vnode = NULLVP;
	844	if (error != 0) {
	845	goto out2;
	846	}
	847	}
	848	}
	849	#if CONFIG_MACF
	850	if ((flags & MNT_UPDATE) == 0) {
	851	mac_mount_label_init(mp);
	852	mac_mount_label_associate(ctx, mp);
	853	}
	854	if (labelstr) {
	855	if ((flags & MNT_UPDATE) != 0) {
	856	error = mac_mount_check_label_update(ctx, mp);
	857	if (error != 0)
	858	goto out3;
	859	}
	860	}
	861	#endif
	862	/*
	863	* Mount the filesystem.
	864	*/
	865	error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx);
	866
	867	if (flags & MNT_UPDATE) {
	868	if (mp->mnt_kern_flag & MNTK_WANTRDWR)
	869	mp->mnt_flag &= ~MNT_RDONLY;
	870	mp->mnt_flag &=~
	871	(MNT_UPDATE \| MNT_RELOAD \| MNT_FORCE);
	872	mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
	873	if (error)
	874	mp->mnt_flag = flag; /* restore flag value */
	875	vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL);
	876	lck_rw_done(&mp->mnt_rwlock);
	877	is_rwlock_locked = FALSE;
	878	if (!error)
	879	enablequotas(mp, ctx);
	880	goto exit;
	881	}
	882
	883	/*
	884	* Put the new filesystem on the mount list after root.
	885	*/
	886	if (error == 0) {
	887	struct vfs_attr vfsattr;
	888	#if CONFIG_MACF
	889	if (vfs_flags(mp) & MNT_MULTILABEL) {
	890	error = VFS_ROOT(mp, &rvp, ctx);
	891	if (error) {
	892	printf("%s() VFS_ROOT returned %d\n", __func__, error);
	893	goto out3;
	894	}
	895	error = vnode_label(mp, NULL, rvp, NULL, 0, ctx);
	896	/*
	897	* drop reference provided by VFS_ROOT
	898	*/
	899	vnode_put(rvp);
	900
	901	if (error)
	902	goto out3;
	903	}
	904	#endif /* MAC */
	905
	906	vnode_lock_spin(vp);
	907	CLR(vp->v_flag, VMOUNT);
	908	vp->v_mountedhere = mp;
	909	vnode_unlock(vp);
	910
	911	/*
	912	* taking the name_cache_lock exclusively will
	913	* insure that everyone is out of the fast path who
	914	* might be trying to use a now stale copy of
	915	* vp->v_mountedhere->mnt_realrootvp
	916	* bumping mount_generation causes the cached values
	917	* to be invalidated
	918	*/
	919	name_cache_lock();
	920	mount_generation++;
	921	name_cache_unlock();
	922
	923	error = vnode_ref(vp);
	924	if (error != 0) {
	925	goto out4;
	926	}
	927
	928	have_usecount = TRUE;
	929
	930	error = checkdirs(vp, ctx);
	931	if (error != 0) {
	932	/* Unmount the filesystem as cdir/rdirs cannot be updated */
	933	goto out4;
	934	}
	935	/*
	936	* there is no cleanup code here so I have made it void
	937	* we need to revisit this
	938	*/
	939	(void)VFS_START(mp, 0, ctx);
	940
	941	if (mount_list_add(mp) != 0) {
	942	/*
	943	* The system is shutting down trying to umount
	944	* everything, so fail with a plausible errno.
	945	*/
	946	error = EBUSY;
	947	goto out4;
	948	}
	949	lck_rw_done(&mp->mnt_rwlock);
	950	is_rwlock_locked = FALSE;
	951
	952	/* Check if this mounted file system supports EAs or named streams. */
	953	/* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */
	954	VFSATTR_INIT(&vfsattr);
	955	VFSATTR_WANTED(&vfsattr, f_capabilities);
	956	if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 &&
	957	vfs_getattr(mp, &vfsattr, ctx) == 0 &&
	958	VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) {
	959	if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) &&
	960	(vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) {
	961	mp->mnt_kern_flag \|= MNTK_EXTENDED_ATTRS;
	962	}
	963	#if NAMEDSTREAMS
	964	if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) &&
	965	(vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) {
	966	mp->mnt_kern_flag \|= MNTK_NAMED_STREAMS;
	967	}
	968	#endif
	969	/* Check if this file system supports path from id lookups. */
	970	if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) &&
	971	(vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) {
	972	mp->mnt_kern_flag \|= MNTK_PATH_FROM_ID;
	973	} else if (mp->mnt_flag & MNT_DOVOLFS) {
	974	/* Legacy MNT_DOVOLFS flag also implies path from id lookups. */
	975	mp->mnt_kern_flag \|= MNTK_PATH_FROM_ID;
	976	}
	977	}
	978	if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) {
	979	mp->mnt_kern_flag \|= MNTK_EXTENDED_ATTRS;
	980	}
	981	if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) {
	982	mp->mnt_kern_flag \|= MNTK_UNMOUNT_PREFLIGHT;
	983	}
	984	/* increment the operations count */
	985	OSAddAtomic(1, &vfs_nummntops);
	986	enablequotas(mp, ctx);
	987
	988	if (device_vnode) {
	989	device_vnode->v_specflags \|= SI_MOUNTEDON;
	990
	991	/*
	992	* cache the IO attributes for the underlying physical media...
	993	* an error return indicates the underlying driver doesn't
	994	* support all the queries necessary... however, reasonable
	995	* defaults will have been set, so no reason to bail or care
	996	*/
	997	vfs_init_io_attributes(device_vnode, mp);
	998	}
	999
	1000	/* Now that mount is setup, notify the listeners */
	1001	vfs_notify_mount(pvp);
	1002	IOBSDMountChange(mp, kIOMountChangeMount);
	1003
	1004	} else {
	1005	/* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */
	1006	if (mp->mnt_vnodelist.tqh_first != NULL) {
	1007	panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.",
	1008	mp->mnt_vtable->vfc_name, error);
	1009	}
	1010
	1011	vnode_lock_spin(vp);
	1012	CLR(vp->v_flag, VMOUNT);
	1013	vnode_unlock(vp);
	1014	mount_list_lock();
	1015	mp->mnt_vtable->vfc_refcount--;
	1016	mount_list_unlock();
	1017
	1018	if (device_vnode ) {
	1019	vnode_rele(device_vnode);
	1020	VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD\|FWRITE, ctx);
	1021	}
	1022	lck_rw_done(&mp->mnt_rwlock);
	1023	is_rwlock_locked = FALSE;
	1024
	1025	/*
	1026	* if we get here, we have a mount structure that needs to be freed,
	1027	* but since the coveredvp hasn't yet been updated to point at it,
	1028	* no need to worry about other threads holding a crossref on this mp
	1029	* so it's ok to just free it
	1030	*/
	1031	mount_lock_destroy(mp);
	1032	#if CONFIG_MACF
	1033	mac_mount_label_destroy(mp);
	1034	#endif
	1035	FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
	1036	}
	1037	exit:
	1038	/*
	1039	* drop I/O count on the device vp if there was one
	1040	*/
	1041	if (devpath && devvp)
	1042	vnode_put(devvp);
	1043
	1044	return(error);
	1045
	1046	/* Error condition exits */
	1047	out4:
	1048	(void)VFS_UNMOUNT(mp, MNT_FORCE, ctx);
	1049
	1050	/*
	1051	* If the mount has been placed on the covered vp,
	1052	* it may have been discovered by now, so we have
	1053	* to treat this just like an unmount
	1054	*/
	1055	mount_lock_spin(mp);
	1056	mp->mnt_lflag \|= MNT_LDEAD;
	1057	mount_unlock(mp);
	1058
	1059	if (device_vnode != NULLVP) {
	1060	vnode_rele(device_vnode);
	1061	VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD\|FWRITE,
	1062	ctx);
	1063	did_rele = TRUE;
	1064	}
	1065
	1066	vnode_lock_spin(vp);
	1067
	1068	mp->mnt_crossref++;
	1069	vp->v_mountedhere = (mount_t) 0;
	1070
	1071	vnode_unlock(vp);
	1072
	1073	if (have_usecount) {
	1074	vnode_rele(vp);
	1075	}
	1076	out3:
	1077	if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele))
	1078	vnode_rele(devvp);
	1079	out2:
	1080	if (devpath && devvp)
	1081	vnode_put(devvp);
	1082	out1:
	1083	/* Release mnt_rwlock only when it was taken */
	1084	if (is_rwlock_locked == TRUE) {
	1085	lck_rw_done(&mp->mnt_rwlock);
	1086	}
	1087
	1088	if (mntalloc) {
	1089	if (mp->mnt_crossref)
	1090	mount_dropcrossref(mp, vp, 0);
	1091	else {
	1092	mount_lock_destroy(mp);
	1093	#if CONFIG_MACF
	1094	mac_mount_label_destroy(mp);
	1095	#endif
	1096	FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
	1097	}
	1098	}
	1099	if (vfsp_ref) {
	1100	mount_list_lock();
	1101	vfsp->vfc_refcount--;
	1102	mount_list_unlock();
	1103	}
	1104
	1105	return(error);
	1106	}
	1107
	1108	/*
	1109	* Flush in-core data, check for competing mount attempts,
	1110	* and set VMOUNT
	1111	*/
	1112	int
	1113	prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname cnp, const char fsname, boolean_t skip_auth)
	1114	{
	1115	#if !CONFIG_MACF
	1116	#pragma unused(cnp,fsname)
	1117	#endif
	1118	struct vnode_attr va;
	1119	int error;
	1120
	1121	if (!skip_auth) {
	1122	/*
	1123	* If the user is not root, ensure that they own the directory
	1124	* onto which we are attempting to mount.
	1125	*/
	1126	VATTR_INIT(&va);
	1127	VATTR_WANTED(&va, va_uid);
	1128	if ((error = vnode_getattr(vp, &va, ctx)) \|\|
	1129	(va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
	1130	(!vfs_context_issuser(ctx)))) {
	1131	error = EPERM;
	1132	goto out;
	1133	}
	1134	}
	1135
	1136	if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) )
	1137	goto out;
	1138
	1139	if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) )
	1140	goto out;
	1141
	1142	if (vp->v_type != VDIR) {
	1143	error = ENOTDIR;
	1144	goto out;
	1145	}
	1146
	1147	if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) {
	1148	error = EBUSY;
	1149	goto out;
	1150	}
	1151
	1152	#if CONFIG_MACF
	1153	error = mac_mount_check_mount(ctx, vp,
	1154	cnp, fsname);
	1155	if (error != 0)
	1156	goto out;
	1157	#endif
	1158
	1159	vnode_lock_spin(vp);
	1160	SET(vp->v_flag, VMOUNT);
	1161	vnode_unlock(vp);
	1162
	1163	out:
	1164	return error;
	1165	}
	1166
	1167	#if CONFIG_IMGSRC_ACCESS
	1168
	1169	#if DEBUG
	1170	#define IMGSRC_DEBUG(args...) printf(args)
	1171	#else
	1172	#define IMGSRC_DEBUG(args...) do { } while(0)
	1173	#endif
	1174
	1175	static int
	1176	authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx)
	1177	{
	1178	struct nameidata nd;
	1179	vnode_t vp, realdevvp;
	1180	mode_t accessmode;
	1181	int error;
	1182
	1183	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx);
	1184	if ( (error = namei(&nd)) ) {
	1185	IMGSRC_DEBUG("namei() failed with %d\n", error);
	1186	return error;
	1187	}
	1188
	1189	vp = nd.ni_vp;
	1190
	1191	if (!vnode_isblk(vp)) {
	1192	IMGSRC_DEBUG("Not block device.\n");
	1193	error = ENOTBLK;
	1194	goto out;
	1195	}
	1196
	1197	realdevvp = mp->mnt_devvp;
	1198	if (realdevvp == NULLVP) {
	1199	IMGSRC_DEBUG("No device backs the mount.\n");
	1200	error = ENXIO;
	1201	goto out;
	1202	}
	1203
	1204	error = vnode_getwithref(realdevvp);
	1205	if (error != 0) {
	1206	IMGSRC_DEBUG("Coudn't get iocount on device.\n");
	1207	goto out;
	1208	}
	1209
	1210	if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) {
	1211	IMGSRC_DEBUG("Wrong dev_t.\n");
	1212	error = ENXIO;
	1213	goto out1;
	1214	}
	1215
	1216	strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
	1217
	1218	/*
	1219	* If mount by non-root, then verify that user has necessary
	1220	* permissions on the device.
	1221	*/
	1222	if (!vfs_context_issuser(ctx)) {
	1223	accessmode = KAUTH_VNODE_READ_DATA;
	1224	if ((mp->mnt_flag & MNT_RDONLY) == 0)
	1225	accessmode \|= KAUTH_VNODE_WRITE_DATA;
	1226	if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) {
	1227	IMGSRC_DEBUG("Access denied.\n");
	1228	goto out1;
	1229	}
	1230	}
	1231
	1232	*devvpp = vp;
	1233
	1234	out1:
	1235	vnode_put(realdevvp);
	1236	out:
	1237	nameidone(&nd);
	1238	if (error) {
	1239	vnode_put(vp);
	1240	}
	1241
	1242	return error;
	1243	}
	1244
	1245	/*
	1246	* Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode,
	1247	* and call checkdirs()
	1248	*/
	1249	static int
	1250	place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx)
	1251	{
	1252	int error;
	1253
	1254	mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */
	1255
	1256	vnode_lock_spin(vp);
	1257	CLR(vp->v_flag, VMOUNT);
	1258	vp->v_mountedhere = mp;
	1259	vnode_unlock(vp);
	1260
	1261	/*
	1262	* taking the name_cache_lock exclusively will
	1263	* insure that everyone is out of the fast path who
	1264	* might be trying to use a now stale copy of
	1265	* vp->v_mountedhere->mnt_realrootvp
	1266	* bumping mount_generation causes the cached values
	1267	* to be invalidated
	1268	*/
	1269	name_cache_lock();
	1270	mount_generation++;
	1271	name_cache_unlock();
	1272
	1273	error = vnode_ref(vp);
	1274	if (error != 0) {
	1275	goto out;
	1276	}
	1277
	1278	error = checkdirs(vp, ctx);
	1279	if (error != 0) {
	1280	/* Unmount the filesystem as cdir/rdirs cannot be updated */
	1281	vnode_rele(vp);
	1282	goto out;
	1283	}
	1284
	1285	out:
	1286	if (error != 0) {
	1287	mp->mnt_vnodecovered = NULLVP;
	1288	}
	1289	return error;
	1290	}
	1291
	1292	static void
	1293	undo_place_on_covered_vp(mount_t mp, vnode_t vp)
	1294	{
	1295	vnode_rele(vp);
	1296	vnode_lock_spin(vp);
	1297	vp->v_mountedhere = (mount_t)NULL;
	1298	vnode_unlock(vp);
	1299
	1300	mp->mnt_vnodecovered = NULLVP;
	1301	}
	1302
	1303	static int
	1304	mount_begin_update(mount_t mp, vfs_context_t ctx, int flags)
	1305	{
	1306	int error;
	1307
	1308	/* unmount in progress return error */
	1309	mount_lock_spin(mp);
	1310	if (mp->mnt_lflag & MNT_LUNMOUNT) {
	1311	mount_unlock(mp);
	1312	return EBUSY;
	1313	}
	1314	mount_unlock(mp);
	1315	lck_rw_lock_exclusive(&mp->mnt_rwlock);
	1316
	1317	/*
	1318	* We only allow the filesystem to be reloaded if it
	1319	* is currently mounted read-only.
	1320	*/
	1321	if ((flags & MNT_RELOAD) &&
	1322	((mp->mnt_flag & MNT_RDONLY) == 0)) {
	1323	error = ENOTSUP;
	1324	goto out;
	1325	}
	1326
	1327	/*
	1328	* Only root, or the user that did the original mount is
	1329	* permitted to update it.
	1330	*/
	1331	if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) &&
	1332	(!vfs_context_issuser(ctx))) {
	1333	error = EPERM;
	1334	goto out;
	1335	}
	1336	#if CONFIG_MACF
	1337	error = mac_mount_check_remount(ctx, mp);
	1338	if (error != 0) {
	1339	goto out;
	1340	}
	1341	#endif
	1342
	1343	out:
	1344	if (error) {
	1345	lck_rw_done(&mp->mnt_rwlock);
	1346	}
	1347
	1348	return error;
	1349	}
	1350
	1351	static void
	1352	mount_end_update(mount_t mp)
	1353	{
	1354	lck_rw_done(&mp->mnt_rwlock);
	1355	}
	1356
	1357	static int
	1358	get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp)
	1359	{
	1360	vnode_t vp;
	1361
	1362	if (height >= MAX_IMAGEBOOT_NESTING) {
	1363	return EINVAL;
	1364	}
	1365
	1366	vp = imgsrc_rootvnodes[height];
	1367	if ((vp != NULLVP) && (vnode_get(vp) == 0)) {
	1368	*rvpp = vp;
	1369	return 0;
	1370	} else {
	1371	return ENOENT;
	1372	}
	1373	}
	1374
	1375	static int
	1376	relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp,
	1377	const char *fsname, vfs_context_t ctx,
	1378	boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index)
	1379	{
	1380	int error;
	1381	mount_t mp;
	1382	boolean_t placed = FALSE;
	1383	vnode_t devvp = NULLVP;
	1384	struct vfstable *vfsp;
	1385	user_addr_t devpath;
	1386	char *old_mntonname;
	1387	vnode_t rvp;
	1388	uint32_t height;
	1389	uint32_t flags;
	1390
	1391	/* If we didn't imageboot, nothing to move */
	1392	if (imgsrc_rootvnodes[0] == NULLVP) {
	1393	return EINVAL;
	1394	}
	1395
	1396	/* Only root can do this */
	1397	if (!vfs_context_issuser(ctx)) {
	1398	return EPERM;
	1399	}
	1400
	1401	IMGSRC_DEBUG("looking for root vnode.\n");
	1402
	1403	/*
	1404	* Get root vnode of filesystem we're moving.
	1405	*/
	1406	if (by_index) {
	1407	if (is64bit) {
	1408	struct user64_mnt_imgsrc_args mia64;
	1409	error = copyin(fsmountargs, &mia64, sizeof(mia64));
	1410	if (error != 0) {
	1411	IMGSRC_DEBUG("Failed to copy in arguments.\n");
	1412	return error;
	1413	}
	1414
	1415	height = mia64.mi_height;
	1416	flags = mia64.mi_flags;
	1417	devpath = mia64.mi_devpath;
	1418	} else {
	1419	struct user32_mnt_imgsrc_args mia32;
	1420	error = copyin(fsmountargs, &mia32, sizeof(mia32));
	1421	if (error != 0) {
	1422	IMGSRC_DEBUG("Failed to copy in arguments.\n");
	1423	return error;
	1424	}
	1425
	1426	height = mia32.mi_height;
	1427	flags = mia32.mi_flags;
	1428	devpath = mia32.mi_devpath;
	1429	}
	1430	} else {
	1431	/*
	1432	* For binary compatibility--assumes one level of nesting.
	1433	*/
	1434	if (is64bit) {
	1435	if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) )
	1436	return error;
	1437	} else {
	1438	user32_addr_t tmp;
	1439	if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) )
	1440	return error;
	1441
	1442	/* munge into LP64 addr */
	1443	devpath = CAST_USER_ADDR_T(tmp);
	1444	}
	1445
	1446	height = 0;
	1447	flags = 0;
	1448	}
	1449
	1450	if (flags != 0) {
	1451	IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__);
	1452	return EINVAL;
	1453	}
	1454
	1455	error = get_imgsrc_rootvnode(height, &rvp);
	1456	if (error != 0) {
	1457	IMGSRC_DEBUG("getting root vnode failed with %d\n", error);
	1458	return error;
	1459	}
	1460
	1461	IMGSRC_DEBUG("got root vnode.\n");
	1462
	1463	MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK);
	1464
	1465	/* Can only move once */
	1466	mp = vnode_mount(rvp);
	1467	if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
	1468	IMGSRC_DEBUG("Already moved.\n");
	1469	error = EBUSY;
	1470	goto out0;
	1471	}
	1472
	1473	IMGSRC_DEBUG("Starting updated.\n");
	1474
	1475	/* Get exclusive rwlock on mount, authorize update on mp */
	1476	error = mount_begin_update(mp , ctx, 0);
	1477	if (error != 0) {
	1478	IMGSRC_DEBUG("Starting updated failed with %d\n", error);
	1479	goto out0;
	1480	}
	1481
	1482	/*
	1483	* It can only be moved once. Flag is set under the rwlock,
	1484	* so we're now safe to proceed.
	1485	*/
	1486	if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) {
	1487	IMGSRC_DEBUG("Already moved [2]\n");
	1488	goto out1;
	1489	}
	1490
	1491
	1492	IMGSRC_DEBUG("Preparing coveredvp.\n");
	1493
	1494	/* Mark covered vnode as mount in progress, authorize placing mount on top */
	1495	error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE);
	1496	if (error != 0) {
	1497	IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error);
	1498	goto out1;
	1499	}
	1500
	1501	IMGSRC_DEBUG("Covered vp OK.\n");
	1502
	1503	/* Sanity check the name caller has provided */
	1504	vfsp = mp->mnt_vtable;
	1505	if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) {
	1506	IMGSRC_DEBUG("Wrong fs name.\n");
	1507	error = EINVAL;
	1508	goto out2;
	1509	}
	1510
	1511	/* Check the device vnode and update mount-from name, for local filesystems */
	1512	if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) {
	1513	IMGSRC_DEBUG("Local, doing device validation.\n");
	1514
	1515	if (devpath != USER_ADDR_NULL) {
	1516	error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx);
	1517	if (error) {
	1518	IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n");
	1519	goto out2;
	1520	}
	1521
	1522	vnode_put(devvp);
	1523	}
	1524	}
	1525
	1526	/*
	1527	* Place mp on top of vnode, ref the vnode, call checkdirs(),
	1528	* and increment the name cache's mount generation
	1529	*/
	1530
	1531	IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n");
	1532	error = place_mount_and_checkdirs(mp, vp, ctx);
	1533	if (error != 0) {
	1534	goto out2;
	1535	}
	1536
	1537	placed = TRUE;
	1538
	1539	strlcpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN);
	1540	strlcpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN);
	1541
	1542	/* Forbid future moves */
	1543	mount_lock(mp);
	1544	mp->mnt_kern_flag \|= MNTK_HAS_MOVED;
	1545	mount_unlock(mp);
	1546
	1547	/* Finally, add to mount list, completely ready to go */
	1548	if (mount_list_add(mp) != 0) {
	1549	/*
	1550	* The system is shutting down trying to umount
	1551	* everything, so fail with a plausible errno.
	1552	*/
	1553	error = EBUSY;
	1554	goto out3;
	1555	}
	1556
	1557	mount_end_update(mp);
	1558	vnode_put(rvp);
	1559	FREE(old_mntonname, M_TEMP);
	1560
	1561	vfs_notify_mount(pvp);
	1562
	1563	return 0;
	1564	out3:
	1565	strlcpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN);
	1566
	1567	mount_lock(mp);
	1568	mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED);
	1569	mount_unlock(mp);
	1570
	1571	out2:
	1572	/*
	1573	* Placing the mp on the vnode clears VMOUNT,
	1574	* so cleanup is different after that point
	1575	*/
	1576	if (placed) {
	1577	/* Rele the vp, clear VMOUNT and v_mountedhere */
	1578	undo_place_on_covered_vp(mp, vp);
	1579	} else {
	1580	vnode_lock_spin(vp);
	1581	CLR(vp->v_flag, VMOUNT);
	1582	vnode_unlock(vp);
	1583	}
	1584	out1:
	1585	mount_end_update(mp);
	1586
	1587	out0:
	1588	vnode_put(rvp);
	1589	FREE(old_mntonname, M_TEMP);
	1590	return error;
	1591	}
	1592
	1593	#endif /* CONFIG_IMGSRC_ACCESS */
	1594
	1595	void
	1596	enablequotas(struct mount *mp, vfs_context_t ctx)
	1597	{
	1598	struct nameidata qnd;
	1599	int type;
	1600	char qfpath[MAXPATHLEN];
	1601	const char *qfname = QUOTAFILENAME;
	1602	const char *qfopsname = QUOTAOPSNAME;
	1603	const char *qfextension[] = INITQFNAMES;
	1604
	1605	/* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */
	1606	if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) {
	1607	return;
	1608	}
	1609	/*
	1610	* Enable filesystem disk quotas if necessary.
	1611	* We ignore errors as this should not interfere with final mount
	1612	*/
	1613	for (type=0; type < MAXQUOTAS; type++) {
	1614	snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]);
	1615	NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE,
	1616	CAST_USER_ADDR_T(qfpath), ctx);
	1617	if (namei(&qnd) != 0)
	1618	continue; /* option file to trigger quotas is not present */
	1619	vnode_put(qnd.ni_vp);
	1620	nameidone(&qnd);
	1621	snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]);
	1622
	1623	(void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx);
	1624	}
	1625	return;
	1626	}
	1627
	1628
	1629	static int
	1630	checkdirs_callback(proc_t p, void * arg)
	1631	{
	1632	struct cdirargs * cdrp = (struct cdirargs * )arg;
	1633	vnode_t olddp = cdrp->olddp;
	1634	vnode_t newdp = cdrp->newdp;
	1635	struct filedesc *fdp;
	1636	vnode_t tvp;
	1637	vnode_t fdp_cvp;
	1638	vnode_t fdp_rvp;
	1639	int cdir_changed = 0;
	1640	int rdir_changed = 0;
	1641
	1642	/*
	1643	* XXX Also needs to iterate each thread in the process to see if it
	1644	* XXX is using a per-thread current working directory, and, if so,
	1645	* XXX update that as well.
	1646	*/
	1647
	1648	proc_fdlock(p);
	1649	fdp = p->p_fd;
	1650	if (fdp == (struct filedesc *)0) {
	1651	proc_fdunlock(p);
	1652	return(PROC_RETURNED);
	1653	}
	1654	fdp_cvp = fdp->fd_cdir;
	1655	fdp_rvp = fdp->fd_rdir;
	1656	proc_fdunlock(p);
	1657
	1658	if (fdp_cvp == olddp) {
	1659	vnode_ref(newdp);
	1660	tvp = fdp->fd_cdir;
	1661	fdp_cvp = newdp;
	1662	cdir_changed = 1;
	1663	vnode_rele(tvp);
	1664	}
	1665	if (fdp_rvp == olddp) {
	1666	vnode_ref(newdp);
	1667	tvp = fdp->fd_rdir;
	1668	fdp_rvp = newdp;
	1669	rdir_changed = 1;
	1670	vnode_rele(tvp);
	1671	}
	1672	if (cdir_changed \|\| rdir_changed) {
	1673	proc_fdlock(p);
	1674	fdp->fd_cdir = fdp_cvp;
	1675	fdp->fd_rdir = fdp_rvp;
	1676	proc_fdunlock(p);
	1677	}
	1678	return(PROC_RETURNED);
	1679	}
	1680
	1681
	1682
	1683	/*
	1684	* Scan all active processes to see if any of them have a current
	1685	* or root directory onto which the new filesystem has just been
	1686	* mounted. If so, replace them with the new mount point.
	1687	*/
	1688	static int
	1689	checkdirs(vnode_t olddp, vfs_context_t ctx)
	1690	{
	1691	vnode_t newdp;
	1692	vnode_t tvp;
	1693	int err;
	1694	struct cdirargs cdr;
	1695
	1696	if (olddp->v_usecount == 1)
	1697	return(0);
	1698	err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx);
	1699
	1700	if (err != 0) {
	1701	#if DIAGNOSTIC
	1702	panic("mount: lost mount: error %d", err);
	1703	#endif
	1704	return(err);
	1705	}
	1706
	1707	cdr.olddp = olddp;
	1708	cdr.newdp = newdp;
	1709	/* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */
	1710	proc_iterate(PROC_ALLPROCLIST \| PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL);
	1711
	1712	if (rootvnode == olddp) {
	1713	vnode_ref(newdp);
	1714	tvp = rootvnode;
	1715	rootvnode = newdp;
	1716	vnode_rele(tvp);
	1717	}
	1718
	1719	vnode_put(newdp);
	1720	return(0);
	1721	}
	1722
	1723	/*
	1724	* Unmount a file system.
	1725	*
	1726	* Note: unmount takes a path to the vnode mounted on as argument,
	1727	* not special file (as before).
	1728	*/
	1729	/* ARGSUSED */
	1730	int
	1731	unmount(__unused proc_t p, struct unmount_args uap, __unused int32_t retval)
	1732	{
	1733	vnode_t vp;
	1734	struct mount *mp;
	1735	int error;
	1736	struct nameidata nd;
	1737	vfs_context_t ctx = vfs_context_current();
	1738
	1739	NDINIT(&nd, LOOKUP, OP_UNMOUNT, FOLLOW \| AUDITVNPATH1,
	1740	UIO_USERSPACE, uap->path, ctx);
	1741	error = namei(&nd);
	1742	if (error)
	1743	return (error);
	1744	vp = nd.ni_vp;
	1745	mp = vp->v_mount;
	1746	nameidone(&nd);
	1747
	1748	#if CONFIG_MACF
	1749	error = mac_mount_check_umount(ctx, mp);
	1750	if (error != 0) {
	1751	vnode_put(vp);
	1752	return (error);
	1753	}
	1754	#endif
	1755	/*
	1756	* Must be the root of the filesystem
	1757	*/
	1758	if ((vp->v_flag & VROOT) == 0) {
	1759	vnode_put(vp);
	1760	return (EINVAL);
	1761	}
	1762	mount_ref(mp, 0);
	1763	vnode_put(vp);
	1764	/* safedounmount consumes the mount ref */
	1765	return (safedounmount(mp, uap->flags, ctx));
	1766	}
	1767
	1768	int
	1769	vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx)
	1770	{
	1771	mount_t mp;
	1772
	1773	mp = mount_list_lookupby_fsid(fsid, 0, 1);
	1774	if (mp == (mount_t)0) {
	1775	return(ENOENT);
	1776	}
	1777	mount_ref(mp, 0);
	1778	mount_iterdrop(mp);
	1779	/* safedounmount consumes the mount ref */
	1780	return(safedounmount(mp, flags, ctx));
	1781	}
	1782
	1783
	1784	/*
	1785	* The mount struct comes with a mount ref which will be consumed.
	1786	* Do the actual file system unmount, prevent some common foot shooting.
	1787	*/
	1788	int
	1789	safedounmount(struct mount *mp, int flags, vfs_context_t ctx)
	1790	{
	1791	int error;
	1792	proc_t p = vfs_context_proc(ctx);
	1793
	1794	/*
	1795	* If the file system is not responding and MNT_NOBLOCK
	1796	* is set and not a forced unmount then return EBUSY.
	1797	*/
	1798	if ((mp->mnt_kern_flag & MNT_LNOTRESP) &&
	1799	(flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) {
	1800	error = EBUSY;
	1801	goto out;
	1802	}
	1803
	1804	/*
	1805	* Skip authorization if the mount is tagged as permissive and
	1806	* this is not a forced-unmount attempt.
	1807	*/
	1808	if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) {
	1809	/*
	1810	* Only root, or the user that did the original mount is
	1811	* permitted to unmount this filesystem.
	1812	*/
	1813	if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) &&
	1814	(error = suser(kauth_cred_get(), &p->p_acflag)))
	1815	goto out;
	1816	}
	1817	/*
	1818	* Don't allow unmounting the root file system.
	1819	*/
	1820	if (mp->mnt_flag & MNT_ROOTFS) {
	1821	error = EBUSY; /* the root is always busy */
	1822	goto out;
	1823	}
	1824
	1825	#ifdef CONFIG_IMGSRC_ACCESS
	1826	if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) {
	1827	error = EBUSY;
	1828	goto out;
	1829	}
	1830	#endif /* CONFIG_IMGSRC_ACCESS */
	1831
	1832	return (dounmount(mp, flags, 1, ctx));
	1833
	1834	out:
	1835	mount_drop(mp, 0);
	1836	return(error);
	1837	}
	1838
	1839	/*
	1840	* Do the actual file system unmount.
	1841	*/
	1842	int
	1843	dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
	1844	{
	1845	vnode_t coveredvp = (vnode_t)0;
	1846	int error;
	1847	int needwakeup = 0;
	1848	int forcedunmount = 0;
	1849	int lflags = 0;
	1850	struct vnode *devvp = NULLVP;
	1851	#if CONFIG_TRIGGERS
	1852	proc_t p = vfs_context_proc(ctx);
	1853	int did_vflush = 0;
	1854	int pflags_save = 0;
	1855	#endif /* CONFIG_TRIGGERS */
	1856
	1857	mount_lock(mp);
	1858
	1859	/*
	1860	* If already an unmount in progress just return EBUSY.
	1861	* Even a forced unmount cannot override.
	1862	*/
	1863	if (mp->mnt_lflag & MNT_LUNMOUNT) {
	1864	if (withref != 0)
	1865	mount_drop(mp, 1);
	1866	mount_unlock(mp);
	1867	return (EBUSY);
	1868	}
	1869
	1870	if (flags & MNT_FORCE) {
	1871	forcedunmount = 1;
	1872	mp->mnt_lflag \|= MNT_LFORCE;
	1873	}
	1874
	1875	#if CONFIG_TRIGGERS
	1876	if (flags & MNT_NOBLOCK && p != kernproc)
	1877	pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag);
	1878	#endif
	1879
	1880	mp->mnt_kern_flag \|= MNTK_UNMOUNT;
	1881	mp->mnt_lflag \|= MNT_LUNMOUNT;
	1882	mp->mnt_flag &=~ MNT_ASYNC;
	1883	/*
	1884	* anyone currently in the fast path that
	1885	* trips over the cached rootvp will be
	1886	* dumped out and forced into the slow path
	1887	* to regenerate a new cached value
	1888	*/
	1889	mp->mnt_realrootvp = NULLVP;
	1890	mount_unlock(mp);
	1891
	1892	if (forcedunmount && (flags & MNT_LNOSUB) == 0) {
	1893	/*
	1894	* Force unmount any mounts in this filesystem.
	1895	* If any unmounts fail - just leave them dangling.
	1896	* Avoids recursion.
	1897	*/
	1898	(void) dounmount_submounts(mp, flags \| MNT_LNOSUB, ctx);
	1899	}
	1900
	1901	/*
	1902	* taking the name_cache_lock exclusively will
	1903	* insure that everyone is out of the fast path who
	1904	* might be trying to use a now stale copy of
	1905	* vp->v_mountedhere->mnt_realrootvp
	1906	* bumping mount_generation causes the cached values
	1907	* to be invalidated
	1908	*/
	1909	name_cache_lock();
	1910	mount_generation++;
	1911	name_cache_unlock();
	1912
	1913
	1914	lck_rw_lock_exclusive(&mp->mnt_rwlock);
	1915	if (withref != 0)
	1916	mount_drop(mp, 0);
	1917	#if CONFIG_FSE
	1918	fsevent_unmount(mp); /* has to come first! */
	1919	#endif
	1920	error = 0;
	1921	if (forcedunmount == 0) {
	1922	ubc_umount(mp); /* release cached vnodes */
	1923	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	1924	error = VFS_SYNC(mp, MNT_WAIT, ctx);
	1925	if (error) {
	1926	mount_lock(mp);
	1927	mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
	1928	mp->mnt_lflag &= ~MNT_LUNMOUNT;
	1929	mp->mnt_lflag &= ~MNT_LFORCE;
	1930	goto out;
	1931	}
	1932	}
	1933	}
	1934
	1935	IOBSDMountChange(mp, kIOMountChangeUnmount);
	1936
	1937	#if CONFIG_TRIGGERS
	1938	vfs_nested_trigger_unmounts(mp, flags, ctx);
	1939	did_vflush = 1;
	1940	#endif
	1941	if (forcedunmount)
	1942	lflags \|= FORCECLOSE;
	1943	error = vflush(mp, NULLVP, SKIPSWAP \| SKIPSYSTEM \| SKIPROOT \| lflags);
	1944	if ((forcedunmount == 0) && error) {
	1945	mount_lock(mp);
	1946	mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
	1947	mp->mnt_lflag &= ~MNT_LUNMOUNT;
	1948	mp->mnt_lflag &= ~MNT_LFORCE;
	1949	goto out;
	1950	}
	1951
	1952	/* make sure there are no one in the mount iterations or lookup */
	1953	mount_iterdrain(mp);
	1954
	1955	error = VFS_UNMOUNT(mp, flags, ctx);
	1956	if (error) {
	1957	mount_iterreset(mp);
	1958	mount_lock(mp);
	1959	mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
	1960	mp->mnt_lflag &= ~MNT_LUNMOUNT;
	1961	mp->mnt_lflag &= ~MNT_LFORCE;
	1962	goto out;
	1963	}
	1964
	1965	/* increment the operations count */
	1966	if (!error)
	1967	OSAddAtomic(1, &vfs_nummntops);
	1968
	1969	if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
	1970	/* hold an io reference and drop the usecount before close */
	1971	devvp = mp->mnt_devvp;
	1972	vnode_getalways(devvp);
	1973	vnode_rele(devvp);
	1974	VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD\|FWRITE,
	1975	ctx);
	1976	vnode_clearmountedon(devvp);
	1977	vnode_put(devvp);
	1978	}
	1979	lck_rw_done(&mp->mnt_rwlock);
	1980	mount_list_remove(mp);
	1981	lck_rw_lock_exclusive(&mp->mnt_rwlock);
	1982
	1983	/* mark the mount point hook in the vp but not drop the ref yet */
	1984	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
	1985	/*
	1986	* The covered vnode needs special handling. Trying to get an
	1987	* iocount must not block here as this may lead to deadlocks
	1988	* if the Filesystem to which the covered vnode belongs is
	1989	* undergoing forced unmounts. Since we hold a usecount, the
	1990	* vnode cannot be reused (it can, however, still be terminated)
	1991	*/
	1992	vnode_getalways(coveredvp);
	1993	vnode_lock_spin(coveredvp);
	1994
	1995	mp->mnt_crossref++;
	1996	coveredvp->v_mountedhere = (struct mount *)0;
	1997	CLR(coveredvp->v_flag, VMOUNT);
	1998
	1999	vnode_unlock(coveredvp);
	2000	vnode_put(coveredvp);
	2001	}
	2002
	2003	mount_list_lock();
	2004	mp->mnt_vtable->vfc_refcount--;
	2005	mount_list_unlock();
	2006
	2007	cache_purgevfs(mp); /* remove cache entries for this file sys */
	2008	vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL);
	2009	mount_lock(mp);
	2010	mp->mnt_lflag \|= MNT_LDEAD;
	2011
	2012	if (mp->mnt_lflag & MNT_LWAIT) {
	2013	/*
	2014	* do the wakeup here
	2015	* in case we block in mount_refdrain
	2016	* which will drop the mount lock
	2017	* and allow anyone blocked in vfs_busy
	2018	* to wakeup and see the LDEAD state
	2019	*/
	2020	mp->mnt_lflag &= ~MNT_LWAIT;
	2021	wakeup((caddr_t)mp);
	2022	}
	2023	mount_refdrain(mp);
	2024	out:
	2025	if (mp->mnt_lflag & MNT_LWAIT) {
	2026	mp->mnt_lflag &= ~MNT_LWAIT;
	2027	needwakeup = 1;
	2028	}
	2029
	2030	#if CONFIG_TRIGGERS
	2031	if (flags & MNT_NOBLOCK && p != kernproc) {
	2032	// Restore P_NOREMOTEHANG bit to its previous value
	2033	if ((pflags_save & P_NOREMOTEHANG) == 0)
	2034	OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag);
	2035	}
	2036
	2037	/*
	2038	* Callback and context are set together under the mount lock, and
	2039	* never cleared, so we're safe to examine them here, drop the lock,
	2040	* and call out.
	2041	*/
	2042	if (mp->mnt_triggercallback != NULL) {
	2043	mount_unlock(mp);
	2044	if (error == 0) {
	2045	mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx);
	2046	} else if (did_vflush) {
	2047	mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx);
	2048	}
	2049	} else {
	2050	mount_unlock(mp);
	2051	}
	2052	#else
	2053	mount_unlock(mp);
	2054	#endif /* CONFIG_TRIGGERS */
	2055
	2056	lck_rw_done(&mp->mnt_rwlock);
	2057
	2058	if (needwakeup)
	2059	wakeup((caddr_t)mp);
	2060
	2061	if (!error) {
	2062	if ((coveredvp != NULLVP)) {
	2063	vnode_t pvp = NULLVP;
	2064
	2065	/*
	2066	* The covered vnode needs special handling. Trying to
	2067	* get an iocount must not block here as this may lead
	2068	* to deadlocks if the Filesystem to which the covered
	2069	* vnode belongs is undergoing forced unmounts. Since we
	2070	* hold a usecount, the vnode cannot be reused
	2071	* (it can, however, still be terminated).
	2072	*/
	2073	vnode_getalways(coveredvp);
	2074
	2075	mount_dropcrossref(mp, coveredvp, 0);
	2076	/*
	2077	* We'll _try_ to detect if this really needs to be
	2078	* done. The coveredvp can only be in termination (or
	2079	* terminated) if the coveredvp's mount point is in a
	2080	* forced unmount (or has been) since we still hold the
	2081	* ref.
	2082	*/
	2083	if (!vnode_isrecycled(coveredvp)) {
	2084	pvp = vnode_getparent(coveredvp);
	2085	#if CONFIG_TRIGGERS
	2086	if (coveredvp->v_resolve) {
	2087	vnode_trigger_rearm(coveredvp, ctx);
	2088	}
	2089	#endif
	2090	}
	2091
	2092	vnode_rele(coveredvp);
	2093	vnode_put(coveredvp);
	2094	coveredvp = NULLVP;
	2095
	2096	if (pvp) {
	2097	lock_vnode_and_post(pvp, NOTE_WRITE);
	2098	vnode_put(pvp);
	2099	}
	2100	} else if (mp->mnt_flag & MNT_ROOTFS) {
	2101	mount_lock_destroy(mp);
	2102	#if CONFIG_MACF
	2103	mac_mount_label_destroy(mp);
	2104	#endif
	2105	FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
	2106	} else
	2107	panic("dounmount: no coveredvp");
	2108	}
	2109	return (error);
	2110	}
	2111
	2112	/*
	2113	* Unmount any mounts in this filesystem.
	2114	*/
	2115	void
	2116	dounmount_submounts(struct mount *mp, int flags, vfs_context_t ctx)
	2117	{
	2118	mount_t smp;
	2119	fsid_t *fsids, fsid;
	2120	int fsids_sz;
	2121	int count = 0, i, m = 0;
	2122	vnode_t vp;
	2123
	2124	mount_list_lock();
	2125
	2126	// Get an array to hold the submounts fsids.
	2127	TAILQ_FOREACH(smp, &mountlist, mnt_list)
	2128	count++;
	2129	fsids_sz = count * sizeof(fsid_t);
	2130	MALLOC(fsids, fsid_t *, fsids_sz, M_TEMP, M_NOWAIT);
	2131	if (fsids == NULL) {
	2132	mount_list_unlock();
	2133	goto out;
	2134	}
	2135	fsids[0] = mp->mnt_vfsstat.f_fsid; // Prime the pump
	2136
	2137	/*
	2138	* Fill the array with submount fsids.
	2139	* Since mounts are always added to the tail of the mount list, the
	2140	* list is always in mount order.
	2141	* For each mount check if the mounted-on vnode belongs to a
	2142	* mount that's already added to our array of mounts to be unmounted.
	2143	*/
	2144	for (smp = TAILQ_NEXT(mp, mnt_list); smp; smp = TAILQ_NEXT(smp, mnt_list)) {
	2145	vp = smp->mnt_vnodecovered;
	2146	if (vp == NULL)
	2147	continue;
	2148	fsid = vnode_mount(vp)->mnt_vfsstat.f_fsid; // Underlying fsid
	2149	for (i = 0; i <= m; i++) {
	2150	if (fsids[i].val[0] == fsid.val[0] &&
	2151	fsids[i].val[1] == fsid.val[1]) {
	2152	fsids[++m] = smp->mnt_vfsstat.f_fsid;
	2153	break;
	2154	}
	2155	}
	2156	}
	2157	mount_list_unlock();
	2158
	2159	// Unmount the submounts in reverse order. Ignore errors.
	2160	for (i = m; i > 0; i--) {
	2161	smp = mount_list_lookupby_fsid(&fsids[i], 0, 1);
	2162	if (smp) {
	2163	mount_ref(smp, 0);
	2164	mount_iterdrop(smp);
	2165	(void) dounmount(smp, flags, 1, ctx);
	2166	}
	2167	}
	2168	out:
	2169	if (fsids)
	2170	FREE(fsids, M_TEMP);
	2171	}
	2172
	2173	void
	2174	mount_dropcrossref(mount_t mp, vnode_t dp, int need_put)
	2175	{
	2176	vnode_lock(dp);
	2177	mp->mnt_crossref--;
	2178
	2179	if (mp->mnt_crossref < 0)
	2180	panic("mount cross refs -ve");
	2181
	2182	if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) {
	2183
	2184	if (need_put)
	2185	vnode_put_locked(dp);
	2186	vnode_unlock(dp);
	2187
	2188	mount_lock_destroy(mp);
	2189	#if CONFIG_MACF
	2190	mac_mount_label_destroy(mp);
	2191	#endif
	2192	FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT);
	2193	return;
	2194	}
	2195	if (need_put)
	2196	vnode_put_locked(dp);
	2197	vnode_unlock(dp);
	2198	}
	2199
	2200
	2201	/*
	2202	* Sync each mounted filesystem.
	2203	*/
	2204	#if DIAGNOSTIC
	2205	int syncprt = 0;
	2206	#endif
	2207
	2208	int print_vmpage_stat=0;
	2209	int sync_timeout = 60; // Sync time limit (sec)
	2210
	2211	static int
	2212	sync_callback(mount_t mp, __unused void *arg)
	2213	{
	2214	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
	2215	int asyncflag = mp->mnt_flag & MNT_ASYNC;
	2216
	2217	mp->mnt_flag &= ~MNT_ASYNC;
	2218	VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_kernel());
	2219	if (asyncflag)
	2220	mp->mnt_flag \|= MNT_ASYNC;
	2221	}
	2222
	2223	return (VFS_RETURNED);
	2224	}
	2225
	2226	/* ARGSUSED */
	2227	int
	2228	sync(__unused proc_t p, __unused struct sync_args uap, __unused int32_t retval)
	2229	{
	2230	vfs_iterate(LK_NOWAIT, sync_callback, NULL);
	2231
	2232	if (print_vmpage_stat) {
	2233	vm_countdirtypages();
	2234	}
	2235
	2236	#if DIAGNOSTIC
	2237	if (syncprt)
	2238	vfs_bufstats();
	2239	#endif /* DIAGNOSTIC */
	2240	return 0;
	2241	}
	2242
	2243	static void
	2244	sync_thread(void *arg, __unused wait_result_t wr)
	2245	{
	2246	int timeout = (int ) arg;
	2247
	2248	vfs_iterate(LK_NOWAIT, sync_callback, NULL);
	2249
	2250	if (timeout)
	2251	wakeup((caddr_t) timeout);
	2252	if (print_vmpage_stat) {
	2253	vm_countdirtypages();
	2254	}
	2255
	2256	#if DIAGNOSTIC
	2257	if (syncprt)
	2258	vfs_bufstats();
	2259	#endif /* DIAGNOSTIC */
	2260	}
	2261
	2262	/*
	2263	* Sync in a separate thread so we can time out if it blocks.
	2264	*/
	2265	static int
	2266	sync_async(int timeout)
	2267	{
	2268	thread_t thd;
	2269	int error;
	2270	struct timespec ts = {timeout, 0};
	2271
	2272	lck_mtx_lock(sync_mtx_lck);
	2273	if (kernel_thread_start(sync_thread, &timeout, &thd) != KERN_SUCCESS) {
	2274	printf("sync_thread failed\n");
	2275	lck_mtx_unlock(sync_mtx_lck);
	2276	return (0);
	2277	}
	2278
	2279	error = msleep((caddr_t) &timeout, sync_mtx_lck, (PVFS \| PDROP \| PCATCH), "sync_thread", &ts);
	2280	if (error) {
	2281	printf("sync timed out: %d sec\n", timeout);
	2282	}
	2283	thread_deallocate(thd);
	2284
	2285	return (0);
	2286	}
	2287
	2288	/*
	2289	* An in-kernel sync for power management to call.
	2290	*/
	2291	__private_extern__ int
	2292	sync_internal(void)
	2293	{
	2294	(void) sync_async(sync_timeout);
	2295
	2296	return 0;
	2297	} /* end of sync_internal call */
	2298
	2299	/*
	2300	* Change filesystem quotas.
	2301	*/
	2302	#if QUOTA
	2303	int
	2304	quotactl(proc_t p, struct quotactl_args uap, __unused int32_t retval)
	2305	{
	2306	struct mount *mp;
	2307	int error, quota_cmd, quota_status;
	2308	caddr_t datap;
	2309	size_t fnamelen;
	2310	struct nameidata nd;
	2311	vfs_context_t ctx = vfs_context_current();
	2312	struct dqblk my_dqblk;
	2313
	2314	AUDIT_ARG(uid, uap->uid);
	2315	AUDIT_ARG(cmd, uap->cmd);
	2316	NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
	2317	uap->path, ctx);
	2318	error = namei(&nd);
	2319	if (error)
	2320	return (error);
	2321	mp = nd.ni_vp->v_mount;
	2322	vnode_put(nd.ni_vp);
	2323	nameidone(&nd);
	2324
	2325	/* copyin any data we will need for downstream code */
	2326	quota_cmd = uap->cmd >> SUBCMDSHIFT;
	2327
	2328	switch (quota_cmd) {
	2329	case Q_QUOTAON:
	2330	/* uap->arg specifies a file from which to take the quotas */
	2331	fnamelen = MAXPATHLEN;
	2332	datap = kalloc(MAXPATHLEN);
	2333	error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen);
	2334	break;
	2335	case Q_GETQUOTA:
	2336	/* uap->arg is a pointer to a dqblk structure. */
	2337	datap = (caddr_t) &my_dqblk;
	2338	break;
	2339	case Q_SETQUOTA:
	2340	case Q_SETUSE:
	2341	/* uap->arg is a pointer to a dqblk structure. */
	2342	datap = (caddr_t) &my_dqblk;
	2343	if (proc_is64bit(p)) {
	2344	struct user_dqblk my_dqblk64;
	2345	error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64));
	2346	if (error == 0) {
	2347	munge_dqblk(&my_dqblk, &my_dqblk64, FALSE);
	2348	}
	2349	}
	2350	else {
	2351	error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk));
	2352	}
	2353	break;
	2354	case Q_QUOTASTAT:
	2355	/* uap->arg is a pointer to an integer */
	2356	datap = (caddr_t) &quota_status;
	2357	break;
	2358	default:
	2359	datap = NULL;
	2360	break;
	2361	} /* switch */
	2362
	2363	if (error == 0) {
	2364	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx);
	2365	}
	2366
	2367	switch (quota_cmd) {
	2368	case Q_QUOTAON:
	2369	if (datap != NULL)
	2370	kfree(datap, MAXPATHLEN);
	2371	break;
	2372	case Q_GETQUOTA:
	2373	/* uap->arg is a pointer to a dqblk structure we need to copy out to */
	2374	if (error == 0) {
	2375	if (proc_is64bit(p)) {
	2376	struct user_dqblk my_dqblk64 = {.dqb_bhardlimit = 0};
	2377	munge_dqblk(&my_dqblk, &my_dqblk64, TRUE);
	2378	error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64));
	2379	}
	2380	else {
	2381	error = copyout(datap, uap->arg, sizeof (struct dqblk));
	2382	}
	2383	}
	2384	break;
	2385	case Q_QUOTASTAT:
	2386	/* uap->arg is a pointer to an integer */
	2387	if (error == 0) {
	2388	error = copyout(datap, uap->arg, sizeof(quota_status));
	2389	}
	2390	break;
	2391	default:
	2392	break;
	2393	} /* switch */
	2394
	2395	return (error);
	2396	}
	2397	#else
	2398	int
	2399	quotactl(__unused proc_t p, __unused struct quotactl_args uap, __unused int32_t retval)
	2400	{
	2401	return (EOPNOTSUPP);
	2402	}
	2403	#endif /* QUOTA */
	2404
	2405	/*
	2406	* Get filesystem statistics.
	2407	*
	2408	* Returns: 0 Success
	2409	* namei:???
	2410	* vfs_update_vfsstat:???
	2411	* munge_statfs:EFAULT
	2412	*/
	2413	/* ARGSUSED */
	2414	int
	2415	statfs(__unused proc_t p, struct statfs_args uap, __unused int32_t retval)
	2416	{
	2417	struct mount *mp;
	2418	struct vfsstatfs *sp;
	2419	int error;
	2420	struct nameidata nd;
	2421	vfs_context_t ctx = vfs_context_current();
	2422	vnode_t vp;
	2423
	2424	NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW \| AUDITVNPATH1,
	2425	UIO_USERSPACE, uap->path, ctx);
	2426	error = namei(&nd);
	2427	if (error)
	2428	return (error);
	2429	vp = nd.ni_vp;
	2430	mp = vp->v_mount;
	2431	sp = &mp->mnt_vfsstat;
	2432	nameidone(&nd);
	2433
	2434	error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT);
	2435	if (error != 0) {
	2436	vnode_put(vp);
	2437	return (error);
	2438	}
	2439
	2440	error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
	2441	vnode_put(vp);
	2442	return (error);
	2443	}
	2444
	2445	/*
	2446	* Get filesystem statistics.
	2447	*/
	2448	/* ARGSUSED */
	2449	int
	2450	fstatfs(__unused proc_t p, struct fstatfs_args uap, __unused int32_t retval)
	2451	{
	2452	vnode_t vp;
	2453	struct mount *mp;
	2454	struct vfsstatfs *sp;
	2455	int error;
	2456
	2457	AUDIT_ARG(fd, uap->fd);
	2458
	2459	if ( (error = file_vnode(uap->fd, &vp)) )
	2460	return (error);
	2461
	2462	error = vnode_getwithref(vp);
	2463	if (error) {
	2464	file_drop(uap->fd);
	2465	return (error);
	2466	}
	2467
	2468	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
	2469
	2470	mp = vp->v_mount;
	2471	if (!mp) {
	2472	error = EBADF;
	2473	goto out;
	2474	}
	2475	sp = &mp->mnt_vfsstat;
	2476	if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) {
	2477	goto out;
	2478	}
	2479
	2480	error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE);
	2481
	2482	out:
	2483	file_drop(uap->fd);
	2484	vnode_put(vp);
	2485
	2486	return (error);
	2487	}
	2488
	2489	/*
	2490	* Common routine to handle copying of statfs64 data to user space
	2491	*/
	2492	static int
	2493	statfs64_common(struct mount mp, struct vfsstatfs sfsp, user_addr_t bufp)
	2494	{
	2495	int error;
	2496	struct statfs64 sfs;
	2497
	2498	bzero(&sfs, sizeof(sfs));
	2499
	2500	sfs.f_bsize = sfsp->f_bsize;
	2501	sfs.f_iosize = (int32_t)sfsp->f_iosize;
	2502	sfs.f_blocks = sfsp->f_blocks;
	2503	sfs.f_bfree = sfsp->f_bfree;
	2504	sfs.f_bavail = sfsp->f_bavail;
	2505	sfs.f_files = sfsp->f_files;
	2506	sfs.f_ffree = sfsp->f_ffree;
	2507	sfs.f_fsid = sfsp->f_fsid;
	2508	sfs.f_owner = sfsp->f_owner;
	2509	sfs.f_type = mp->mnt_vtable->vfc_typenum;
	2510	sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	2511	sfs.f_fssubtype = sfsp->f_fssubtype;
	2512	if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
	2513	strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN);
	2514	} else {
	2515	strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN);
	2516	}
	2517	strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN);
	2518	strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN);
	2519
	2520	error = copyout((caddr_t)&sfs, bufp, sizeof(sfs));
	2521
	2522	return(error);
	2523	}
	2524
	2525	/*
	2526	* Get file system statistics in 64-bit mode
	2527	*/
	2528	int
	2529	statfs64(__unused struct proc p, struct statfs64_args uap, __unused int32_t *retval)
	2530	{
	2531	struct mount *mp;
	2532	struct vfsstatfs *sp;
	2533	int error;
	2534	struct nameidata nd;
	2535	vfs_context_t ctxp = vfs_context_current();
	2536	vnode_t vp;
	2537
	2538	NDINIT(&nd, LOOKUP, OP_STATFS, FOLLOW \| AUDITVNPATH1,
	2539	UIO_USERSPACE, uap->path, ctxp);
	2540	error = namei(&nd);
	2541	if (error)
	2542	return (error);
	2543	vp = nd.ni_vp;
	2544	mp = vp->v_mount;
	2545	sp = &mp->mnt_vfsstat;
	2546	nameidone(&nd);
	2547
	2548	error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT);
	2549	if (error != 0) {
	2550	vnode_put(vp);
	2551	return (error);
	2552	}
	2553
	2554	error = statfs64_common(mp, sp, uap->buf);
	2555	vnode_put(vp);
	2556
	2557	return (error);
	2558	}
	2559
	2560	/*
	2561	* Get file system statistics in 64-bit mode
	2562	*/
	2563	int
	2564	fstatfs64(__unused struct proc p, struct fstatfs64_args uap, __unused int32_t *retval)
	2565	{
	2566	struct vnode *vp;
	2567	struct mount *mp;
	2568	struct vfsstatfs *sp;
	2569	int error;
	2570
	2571	AUDIT_ARG(fd, uap->fd);
	2572
	2573	if ( (error = file_vnode(uap->fd, &vp)) )
	2574	return (error);
	2575
	2576	error = vnode_getwithref(vp);
	2577	if (error) {
	2578	file_drop(uap->fd);
	2579	return (error);
	2580	}
	2581
	2582	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
	2583
	2584	mp = vp->v_mount;
	2585	if (!mp) {
	2586	error = EBADF;
	2587	goto out;
	2588	}
	2589	sp = &mp->mnt_vfsstat;
	2590	if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) {
	2591	goto out;
	2592	}
	2593
	2594	error = statfs64_common(mp, sp, uap->buf);
	2595
	2596	out:
	2597	file_drop(uap->fd);
	2598	vnode_put(vp);
	2599
	2600	return (error);
	2601	}
	2602
	2603	struct getfsstat_struct {
	2604	user_addr_t sfsp;
	2605	user_addr_t *mp;
	2606	int count;
	2607	int maxcount;
	2608	int flags;
	2609	int error;
	2610	};
	2611
	2612
	2613	static int
	2614	getfsstat_callback(mount_t mp, void * arg)
	2615	{
	2616
	2617	struct getfsstat_struct fstp = (struct getfsstat_struct )arg;
	2618	struct vfsstatfs *sp;
	2619	int error, my_size;
	2620	vfs_context_t ctx = vfs_context_current();
	2621
	2622	if (fstp->sfsp && fstp->count < fstp->maxcount) {
	2623	sp = &mp->mnt_vfsstat;
	2624	/*
	2625	* If MNT_NOWAIT is specified, do not refresh the
	2626	* fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT.
	2627	*/
	2628	if (((fstp->flags & MNT_NOWAIT) == 0 \|\| (fstp->flags & (MNT_WAIT \| MNT_DWAIT))) &&
	2629	(error = vfs_update_vfsstat(mp, ctx,
	2630	VFS_USER_EVENT))) {
	2631	KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
	2632	return(VFS_RETURNED);
	2633	}
	2634
	2635	/*
	2636	* Need to handle LP64 version of struct statfs
	2637	*/
	2638	error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE);
	2639	if (error) {
	2640	fstp->error = error;
	2641	return(VFS_RETURNED_DONE);
	2642	}
	2643	fstp->sfsp += my_size;
	2644
	2645	if (fstp->mp) {
	2646	#if CONFIG_MACF
	2647	error = mac_mount_label_get(mp, *fstp->mp);
	2648	if (error) {
	2649	fstp->error = error;
	2650	return(VFS_RETURNED_DONE);
	2651	}
	2652	#endif
	2653	fstp->mp++;
	2654	}
	2655	}
	2656	fstp->count++;
	2657	return(VFS_RETURNED);
	2658	}
	2659
	2660	/*
	2661	* Get statistics on all filesystems.
	2662	*/
	2663	int
	2664	getfsstat(__unused proc_t p, struct getfsstat_args uap, int retval)
	2665	{
	2666	struct __mac_getfsstat_args muap;
	2667
	2668	muap.buf = uap->buf;
	2669	muap.bufsize = uap->bufsize;
	2670	muap.mac = USER_ADDR_NULL;
	2671	muap.macsize = 0;
	2672	muap.flags = uap->flags;
	2673
	2674	return (__mac_getfsstat(p, &muap, retval));
	2675	}
	2676
	2677	/*
	2678	* __mac_getfsstat: Get MAC-related file system statistics
	2679	*
	2680	* Parameters: p (ignored)
	2681	* uap User argument descriptor (see below)
	2682	* retval Count of file system statistics (N stats)
	2683	*
	2684	* Indirect: uap->bufsize Buffer size
	2685	* uap->macsize MAC info size
	2686	* uap->buf Buffer where information will be returned
	2687	* uap->mac MAC info
	2688	* uap->flags File system flags
	2689	*
	2690	*
	2691	* Returns: 0 Success
	2692	* !0 Not success
	2693	*
	2694	*/
	2695	int
	2696	__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args uap, int retval)
	2697	{
	2698	user_addr_t sfsp;
	2699	user_addr_t *mp;
	2700	size_t count, maxcount, bufsize, macsize;
	2701	struct getfsstat_struct fst;
	2702
	2703	bufsize = (size_t) uap->bufsize;
	2704	macsize = (size_t) uap->macsize;
	2705
	2706	if (IS_64BIT_PROCESS(p)) {
	2707	maxcount = bufsize / sizeof(struct user64_statfs);
	2708	}
	2709	else {
	2710	maxcount = bufsize / sizeof(struct user32_statfs);
	2711	}
	2712	sfsp = uap->buf;
	2713	count = 0;
	2714
	2715	mp = NULL;
	2716
	2717	#if CONFIG_MACF
	2718	if (uap->mac != USER_ADDR_NULL) {
	2719	u_int32_t *mp0;
	2720	int error;
	2721	unsigned int i;
	2722
	2723	count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4));
	2724	if (count != maxcount)
	2725	return (EINVAL);
	2726
	2727	/* Copy in the array */
	2728	MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK);
	2729	if (mp0 == NULL) {
	2730	return (ENOMEM);
	2731	}
	2732
	2733	error = copyin(uap->mac, mp0, macsize);
	2734	if (error) {
	2735	FREE(mp0, M_MACTEMP);
	2736	return (error);
	2737	}
	2738
	2739	/* Normalize to an array of user_addr_t */
	2740	MALLOC(mp, user_addr_t , count sizeof(user_addr_t), M_MACTEMP, M_WAITOK);
	2741	if (mp == NULL) {
	2742	FREE(mp0, M_MACTEMP);
	2743	return (ENOMEM);
	2744	}
	2745
	2746	for (i = 0; i < count; i++) {
	2747	if (IS_64BIT_PROCESS(p))
	2748	mp[i] = ((user_addr_t *)mp0)[i];
	2749	else
	2750	mp[i] = (user_addr_t)mp0[i];
	2751	}
	2752	FREE(mp0, M_MACTEMP);
	2753	}
	2754	#endif
	2755
	2756
	2757	fst.sfsp = sfsp;
	2758	fst.mp = mp;
	2759	fst.flags = uap->flags;
	2760	fst.count = 0;
	2761	fst.error = 0;
	2762	fst.maxcount = maxcount;
	2763
	2764
	2765	vfs_iterate(0, getfsstat_callback, &fst);
	2766
	2767	if (mp)
	2768	FREE(mp, M_MACTEMP);
	2769
	2770	if (fst.error ) {
	2771	KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
	2772	return(fst.error);
	2773	}
	2774
	2775	if (fst.sfsp && fst.count > fst.maxcount)
	2776	*retval = fst.maxcount;
	2777	else
	2778	*retval = fst.count;
	2779	return (0);
	2780	}
	2781
	2782	static int
	2783	getfsstat64_callback(mount_t mp, void * arg)
	2784	{
	2785	struct getfsstat_struct fstp = (struct getfsstat_struct )arg;
	2786	struct vfsstatfs *sp;
	2787	int error;
	2788
	2789	if (fstp->sfsp && fstp->count < fstp->maxcount) {
	2790	sp = &mp->mnt_vfsstat;
	2791	/*
	2792	* If MNT_NOWAIT is specified, do not refresh the fsstat
	2793	* cache. MNT_WAIT overrides MNT_NOWAIT.
	2794	*
	2795	* We treat MNT_DWAIT as MNT_WAIT for all instances of
	2796	* getfsstat, since the constants are out of the same
	2797	* namespace.
	2798	*/
	2799	if (((fstp->flags & MNT_NOWAIT) == 0 \|\|
	2800	(fstp->flags & (MNT_WAIT \| MNT_DWAIT))) &&
	2801	(error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) {
	2802	KAUTH_DEBUG("vfs_update_vfsstat returned %d", error);
	2803	return(VFS_RETURNED);
	2804	}
	2805
	2806	error = statfs64_common(mp, sp, fstp->sfsp);
	2807	if (error) {
	2808	fstp->error = error;
	2809	return(VFS_RETURNED_DONE);
	2810	}
	2811	fstp->sfsp += sizeof(struct statfs64);
	2812	}
	2813	fstp->count++;
	2814	return(VFS_RETURNED);
	2815	}
	2816
	2817	/*
	2818	* Get statistics on all file systems in 64 bit mode.
	2819	*/
	2820	int
	2821	getfsstat64(__unused proc_t p, struct getfsstat64_args uap, int retval)
	2822	{
	2823	user_addr_t sfsp;
	2824	int count, maxcount;
	2825	struct getfsstat_struct fst;
	2826
	2827	maxcount = uap->bufsize / sizeof(struct statfs64);
	2828
	2829	sfsp = uap->buf;
	2830	count = 0;
	2831
	2832	fst.sfsp = sfsp;
	2833	fst.flags = uap->flags;
	2834	fst.count = 0;
	2835	fst.error = 0;
	2836	fst.maxcount = maxcount;
	2837
	2838	vfs_iterate(0, getfsstat64_callback, &fst);
	2839
	2840	if (fst.error ) {
	2841	KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error);
	2842	return(fst.error);
	2843	}
	2844
	2845	if (fst.sfsp && fst.count > fst.maxcount)
	2846	*retval = fst.maxcount;
	2847	else
	2848	*retval = fst.count;
	2849
	2850	return (0);
	2851	}
	2852
	2853	/*
	2854	* gets the associated vnode with the file descriptor passed.
	2855	* as input
	2856	*
	2857	* INPUT
	2858	* ctx - vfs context of caller
	2859	* fd - file descriptor for which vnode is required.
	2860	* vpp - Pointer to pointer to vnode to be returned.
	2861	*
	2862	* The vnode is returned with an iocount so any vnode obtained
	2863	* by this call needs a vnode_put
	2864	*
	2865	*/
	2866	static int
	2867	vnode_getfromfd(vfs_context_t ctx, int fd, vnode_t *vpp)
	2868	{
	2869	int error;
	2870	vnode_t vp;
	2871	struct fileproc *fp;
	2872	proc_t p = vfs_context_proc(ctx);
	2873
	2874	*vpp = NULLVP;
	2875
	2876	error = fp_getfvp(p, fd, &fp, &vp);
	2877	if (error)
	2878	return (error);
	2879
	2880	error = vnode_getwithref(vp);
	2881	if (error) {
	2882	(void)fp_drop(p, fd, fp, 0);
	2883	return (error);
	2884	}
	2885
	2886	(void)fp_drop(p, fd, fp, 0);
	2887	*vpp = vp;
	2888	return (error);
	2889	}
	2890
	2891	/*
	2892	* Wrapper function around namei to start lookup from a directory
	2893	* specified by a file descriptor ni_dirfd.
	2894	*
	2895	* In addition to all the errors returned by namei, this call can
	2896	* return ENOTDIR if the file descriptor does not refer to a directory.
	2897	* and EBADF if the file descriptor is not valid.
	2898	*/
	2899	int
	2900	nameiat(struct nameidata *ndp, int dirfd)
	2901	{
	2902	if ((dirfd != AT_FDCWD) &&
	2903	!(ndp->ni_flag & NAMEI_CONTLOOKUP) &&
	2904	!(ndp->ni_cnd.cn_flags & USEDVP)) {
	2905	int error = 0;
	2906	char c;
	2907
	2908	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
	2909	error = copyin(ndp->ni_dirp, &c, sizeof(char));
	2910	if (error)
	2911	return (error);
	2912	} else {
	2913	c = ((char )(ndp->ni_dirp));
	2914	}
	2915
	2916	if (c != '/') {
	2917	vnode_t dvp_at;
	2918
	2919	error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
	2920	&dvp_at);
	2921	if (error)
	2922	return (error);
	2923
	2924	if (vnode_vtype(dvp_at) != VDIR) {
	2925	vnode_put(dvp_at);
	2926	return (ENOTDIR);
	2927	}
	2928
	2929	ndp->ni_dvp = dvp_at;
	2930	ndp->ni_cnd.cn_flags \|= USEDVP;
	2931	error = namei(ndp);
	2932	ndp->ni_cnd.cn_flags &= ~USEDVP;
	2933	vnode_put(dvp_at);
	2934	return (error);
	2935	}
	2936	}
	2937
	2938	return (namei(ndp));
	2939	}
	2940
	2941	/*
	2942	* Change current working directory to a given file descriptor.
	2943	*/
	2944	/* ARGSUSED */
	2945	static int
	2946	common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread)
	2947	{
	2948	struct filedesc *fdp = p->p_fd;
	2949	vnode_t vp;
	2950	vnode_t tdp;
	2951	vnode_t tvp;
	2952	struct mount *mp;
	2953	int error;
	2954	vfs_context_t ctx = vfs_context_current();
	2955
	2956	AUDIT_ARG(fd, uap->fd);
	2957	if (per_thread && uap->fd == -1) {
	2958	/*
	2959	* Switching back from per-thread to per process CWD; verify we
	2960	* in fact have one before proceeding. The only success case
	2961	* for this code path is to return 0 preemptively after zapping
	2962	* the thread structure contents.
	2963	*/
	2964	thread_t th = vfs_context_thread(ctx);
	2965	if (th) {
	2966	uthread_t uth = get_bsdthread_info(th);
	2967	tvp = uth->uu_cdir;
	2968	uth->uu_cdir = NULLVP;
	2969	if (tvp != NULLVP) {
	2970	vnode_rele(tvp);
	2971	return (0);
	2972	}
	2973	}
	2974	return (EBADF);
	2975	}
	2976
	2977	if ( (error = file_vnode(uap->fd, &vp)) )
	2978	return(error);
	2979	if ( (error = vnode_getwithref(vp)) ) {
	2980	file_drop(uap->fd);
	2981	return(error);
	2982	}
	2983
	2984	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	2985
	2986	if (vp->v_type != VDIR) {
	2987	error = ENOTDIR;
	2988	goto out;
	2989	}
	2990
	2991	#if CONFIG_MACF
	2992	error = mac_vnode_check_chdir(ctx, vp);
	2993	if (error)
	2994	goto out;
	2995	#endif
	2996	error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
	2997	if (error)
	2998	goto out;
	2999
	3000	while (!error && (mp = vp->v_mountedhere) != NULL) {
	3001	if (vfs_busy(mp, LK_NOWAIT)) {
	3002	error = EACCES;
	3003	goto out;
	3004	}
	3005	error = VFS_ROOT(mp, &tdp, ctx);
	3006	vfs_unbusy(mp);
	3007	if (error)
	3008	break;
	3009	vnode_put(vp);
	3010	vp = tdp;
	3011	}
	3012	if (error)
	3013	goto out;
	3014	if ( (error = vnode_ref(vp)) )
	3015	goto out;
	3016	vnode_put(vp);
	3017
	3018	if (per_thread) {
	3019	thread_t th = vfs_context_thread(ctx);
	3020	if (th) {
	3021	uthread_t uth = get_bsdthread_info(th);
	3022	tvp = uth->uu_cdir;
	3023	uth->uu_cdir = vp;
	3024	OSBitOrAtomic(P_THCWD, &p->p_flag);
	3025	} else {
	3026	vnode_rele(vp);
	3027	return (ENOENT);
	3028	}
	3029	} else {
	3030	proc_fdlock(p);
	3031	tvp = fdp->fd_cdir;
	3032	fdp->fd_cdir = vp;
	3033	proc_fdunlock(p);
	3034	}
	3035
	3036	if (tvp)
	3037	vnode_rele(tvp);
	3038	file_drop(uap->fd);
	3039
	3040	return (0);
	3041	out:
	3042	vnode_put(vp);
	3043	file_drop(uap->fd);
	3044
	3045	return(error);
	3046	}
	3047
	3048	int
	3049	fchdir(proc_t p, struct fchdir_args uap, __unused int32_t retval)
	3050	{
	3051	return common_fchdir(p, uap, 0);
	3052	}
	3053
	3054	int
	3055	__pthread_fchdir(proc_t p, struct __pthread_fchdir_args uap, __unused int32_t retval)
	3056	{
	3057	return common_fchdir(p, (void *)uap, 1);
	3058	}
	3059
	3060	/*
	3061	* Change current working directory (".").
	3062	*
	3063	* Returns: 0 Success
	3064	* change_dir:ENOTDIR
	3065	* change_dir:???
	3066	* vnode_ref:ENOENT No such file or directory
	3067	*/
	3068	/* ARGSUSED */
	3069	static int
	3070	common_chdir(proc_t p, struct chdir_args *uap, int per_thread)
	3071	{
	3072	struct filedesc *fdp = p->p_fd;
	3073	int error;
	3074	struct nameidata nd;
	3075	vnode_t tvp;
	3076	vfs_context_t ctx = vfs_context_current();
	3077
	3078	NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW \| AUDITVNPATH1,
	3079	UIO_USERSPACE, uap->path, ctx);
	3080	error = change_dir(&nd, ctx);
	3081	if (error)
	3082	return (error);
	3083	if ( (error = vnode_ref(nd.ni_vp)) ) {
	3084	vnode_put(nd.ni_vp);
	3085	return (error);
	3086	}
	3087	/*
	3088	* drop the iocount we picked up in change_dir
	3089	*/
	3090	vnode_put(nd.ni_vp);
	3091
	3092	if (per_thread) {
	3093	thread_t th = vfs_context_thread(ctx);
	3094	if (th) {
	3095	uthread_t uth = get_bsdthread_info(th);
	3096	tvp = uth->uu_cdir;
	3097	uth->uu_cdir = nd.ni_vp;
	3098	OSBitOrAtomic(P_THCWD, &p->p_flag);
	3099	} else {
	3100	vnode_rele(nd.ni_vp);
	3101	return (ENOENT);
	3102	}
	3103	} else {
	3104	proc_fdlock(p);
	3105	tvp = fdp->fd_cdir;
	3106	fdp->fd_cdir = nd.ni_vp;
	3107	proc_fdunlock(p);
	3108	}
	3109
	3110	if (tvp)
	3111	vnode_rele(tvp);
	3112
	3113	return (0);
	3114	}
	3115
	3116
	3117	/*
	3118	* chdir
	3119	*
	3120	* Change current working directory (".") for the entire process
	3121	*
	3122	* Parameters: p Process requesting the call
	3123	* uap User argument descriptor (see below)
	3124	* retval (ignored)
	3125	*
	3126	* Indirect parameters: uap->path Directory path
	3127	*
	3128	* Returns: 0 Success
	3129	* common_chdir: ENOTDIR
	3130	* common_chdir: ENOENT No such file or directory
	3131	* common_chdir: ???
	3132	*
	3133	*/
	3134	int
	3135	chdir(proc_t p, struct chdir_args uap, __unused int32_t retval)
	3136	{
	3137	return common_chdir(p, (void *)uap, 0);
	3138	}
	3139
	3140	/*
	3141	* __pthread_chdir
	3142	*
	3143	* Change current working directory (".") for a single thread
	3144	*
	3145	* Parameters: p Process requesting the call
	3146	* uap User argument descriptor (see below)
	3147	* retval (ignored)
	3148	*
	3149	* Indirect parameters: uap->path Directory path
	3150	*
	3151	* Returns: 0 Success
	3152	* common_chdir: ENOTDIR
	3153	* common_chdir: ENOENT No such file or directory
	3154	* common_chdir: ???
	3155	*
	3156	*/
	3157	int
	3158	__pthread_chdir(proc_t p, struct __pthread_chdir_args uap, __unused int32_t retval)
	3159	{
	3160	return common_chdir(p, (void *)uap, 1);
	3161	}
	3162
	3163
	3164	/*
	3165	* Change notion of root (``/'') directory.
	3166	*/
	3167	/* ARGSUSED */
	3168	int
	3169	chroot(proc_t p, struct chroot_args uap, __unused int32_t retval)
	3170	{
	3171	struct filedesc *fdp = p->p_fd;
	3172	int error;
	3173	struct nameidata nd;
	3174	vnode_t tvp;
	3175	vfs_context_t ctx = vfs_context_current();
	3176
	3177	if ((error = suser(kauth_cred_get(), &p->p_acflag)))
	3178	return (error);
	3179
	3180	NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW \| AUDITVNPATH1,
	3181	UIO_USERSPACE, uap->path, ctx);
	3182	error = change_dir(&nd, ctx);
	3183	if (error)
	3184	return (error);
	3185
	3186	#if CONFIG_MACF
	3187	error = mac_vnode_check_chroot(ctx, nd.ni_vp,
	3188	&nd.ni_cnd);
	3189	if (error) {
	3190	vnode_put(nd.ni_vp);
	3191	return (error);
	3192	}
	3193	#endif
	3194
	3195	if ( (error = vnode_ref(nd.ni_vp)) ) {
	3196	vnode_put(nd.ni_vp);
	3197	return (error);
	3198	}
	3199	vnode_put(nd.ni_vp);
	3200
	3201	proc_fdlock(p);
	3202	tvp = fdp->fd_rdir;
	3203	fdp->fd_rdir = nd.ni_vp;
	3204	fdp->fd_flags \|= FD_CHROOT;
	3205	proc_fdunlock(p);
	3206
	3207	if (tvp != NULL)
	3208	vnode_rele(tvp);
	3209
	3210	return (0);
	3211	}
	3212
	3213	/*
	3214	* Common routine for chroot and chdir.
	3215	*
	3216	* Returns: 0 Success
	3217	* ENOTDIR Not a directory
	3218	* namei:??? [anything namei can return]
	3219	* vnode_authorize:??? [anything vnode_authorize can return]
	3220	*/
	3221	static int
	3222	change_dir(struct nameidata *ndp, vfs_context_t ctx)
	3223	{
	3224	vnode_t vp;
	3225	int error;
	3226
	3227	if ((error = namei(ndp)))
	3228	return (error);
	3229	nameidone(ndp);
	3230	vp = ndp->ni_vp;
	3231
	3232	if (vp->v_type != VDIR) {
	3233	vnode_put(vp);
	3234	return (ENOTDIR);
	3235	}
	3236
	3237	#if CONFIG_MACF
	3238	error = mac_vnode_check_chdir(ctx, vp);
	3239	if (error) {
	3240	vnode_put(vp);
	3241	return (error);
	3242	}
	3243	#endif
	3244
	3245	error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx);
	3246	if (error) {
	3247	vnode_put(vp);
	3248	return (error);
	3249	}
	3250
	3251	return (error);
	3252	}
	3253
	3254	/*
	3255	* Free the vnode data (for directories) associated with the file glob.
	3256	*/
	3257	struct fd_vn_data *
	3258	fg_vn_data_alloc(void)
	3259	{
	3260	struct fd_vn_data *fvdata;
	3261
	3262	/* Allocate per fd vnode data */
	3263	MALLOC(fvdata, struct fd_vn_data *, (sizeof(struct fd_vn_data)),
	3264	M_FD_VN_DATA, M_WAITOK \| M_ZERO);
	3265	lck_mtx_init(&fvdata->fv_lock, fd_vn_lck_grp, fd_vn_lck_attr);
	3266	return fvdata;
	3267	}
	3268
	3269	/*
	3270	* Free the vnode data (for directories) associated with the file glob.
	3271	*/
	3272	void
	3273	fg_vn_data_free(void *fgvndata)
	3274	{
	3275	struct fd_vn_data fvdata = (struct fd_vn_data )fgvndata;
	3276
	3277	if (fvdata->fv_buf)
	3278	FREE(fvdata->fv_buf, M_FD_DIRBUF);
	3279	lck_mtx_destroy(&fvdata->fv_lock, fd_vn_lck_grp);
	3280	FREE(fvdata, M_FD_VN_DATA);
	3281	}
	3282
	3283	/*
	3284	* Check permissions, allocate an open file structure,
	3285	* and call the device open routine if any.
	3286	*
	3287	* Returns: 0 Success
	3288	* EINVAL
	3289	* EINTR
	3290	* falloc:ENFILE
	3291	* falloc:EMFILE
	3292	* falloc:ENOMEM
	3293	* vn_open_auth:???
	3294	* dupfdopen:???
	3295	* VNOP_ADVLOCK:???
	3296	* vnode_setsize:???
	3297	*
	3298	* XXX Need to implement uid, gid
	3299	*/
	3300	int
	3301	open1(vfs_context_t ctx, struct nameidata *ndp, int uflags,
	3302	struct vnode_attr vap, fp_allocfn_t fp_zalloc, void cra,
	3303	int32_t *retval)
	3304	{
	3305	proc_t p = vfs_context_proc(ctx);
	3306	uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx));
	3307	struct fileproc *fp;
	3308	vnode_t vp;
	3309	int flags, oflags;
	3310	int type, indx, error;
	3311	struct flock lf;
	3312	struct vfs_context context;
	3313
	3314	oflags = uflags;
	3315
	3316	if ((oflags & O_ACCMODE) == O_ACCMODE)
	3317	return(EINVAL);
	3318
	3319	flags = FFLAGS(uflags);
	3320	CLR(flags, FENCRYPTED);
	3321	CLR(flags, FUNENCRYPTED);
	3322
	3323	AUDIT_ARG(fflags, oflags);
	3324	AUDIT_ARG(mode, vap->va_mode);
	3325
	3326	if ((error = falloc_withalloc(p,
	3327	&fp, &indx, ctx, fp_zalloc, cra)) != 0) {
	3328	return (error);
	3329	}
	3330	uu->uu_dupfd = -indx - 1;
	3331
	3332	if ((error = vn_open_auth(ndp, &flags, vap))) {
	3333	if ((error == ENODEV \|\| error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */
	3334	if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) {
	3335	fp_drop(p, indx, NULL, 0);
	3336	*retval = indx;
	3337	return (0);
	3338	}
	3339	}
	3340	if (error == ERESTART)
	3341	error = EINTR;
	3342	fp_free(p, indx, fp);
	3343	return (error);
	3344	}
	3345	uu->uu_dupfd = 0;
	3346	vp = ndp->ni_vp;
	3347
	3348	fp->f_fglob->fg_flag = flags & (FMASK \| O_EVTONLY \| FENCRYPTED \| FUNENCRYPTED);
	3349	fp->f_fglob->fg_ops = &vnops;
	3350	fp->f_fglob->fg_data = (caddr_t)vp;
	3351
	3352	if (flags & (O_EXLOCK \| O_SHLOCK)) {
	3353	lf.l_whence = SEEK_SET;
	3354	lf.l_start = 0;
	3355	lf.l_len = 0;
	3356	if (flags & O_EXLOCK)
	3357	lf.l_type = F_WRLCK;
	3358	else
	3359	lf.l_type = F_RDLCK;
	3360	type = F_FLOCK;
	3361	if ((flags & FNONBLOCK) == 0)
	3362	type \|= F_WAIT;
	3363	#if CONFIG_MACF
	3364	error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob,
	3365	F_SETLK, &lf);
	3366	if (error)
	3367	goto bad;
	3368	#endif
	3369	if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL)))
	3370	goto bad;
	3371	fp->f_fglob->fg_flag \|= FHASLOCK;
	3372	}
	3373
	3374	/* try to truncate by setting the size attribute */
	3375	if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0))
	3376	goto bad;
	3377
	3378	/*
	3379	* For directories we hold some additional information in the fd.
	3380	*/
	3381	if (vnode_vtype(vp) == VDIR) {
	3382	fp->f_fglob->fg_vn_data = fg_vn_data_alloc();
	3383	} else {
	3384	fp->f_fglob->fg_vn_data = NULL;
	3385	}
	3386
	3387	vnode_put(vp);
	3388
	3389	/*
	3390	* The first terminal open (without a O_NOCTTY) by a session leader
	3391	* results in it being set as the controlling terminal.
	3392	*/
	3393	if (vnode_istty(vp) && !(p->p_flag & P_CONTROLT) &&
	3394	!(flags & O_NOCTTY)) {
	3395	int tmp = 0;
	3396
	3397	(void)(*fp->f_fglob->fg_ops->fo_ioctl)(fp, (int)TIOCSCTTY,
	3398	(caddr_t)&tmp, ctx);
	3399	}
	3400
	3401	proc_fdlock(p);
	3402	if (flags & O_CLOEXEC)
	3403	*fdflags(p, indx) \|= UF_EXCLOSE;
	3404	if (flags & O_CLOFORK)
	3405	*fdflags(p, indx) \|= UF_FORKCLOSE;
	3406	procfdtbl_releasefd(p, indx, NULL);
	3407	fp_drop(p, indx, fp, 1);
	3408	proc_fdunlock(p);
	3409
	3410	*retval = indx;
	3411
	3412	return (0);
	3413	bad:
	3414	context = *vfs_context_current();
	3415	context.vc_ucred = fp->f_fglob->fg_cred;
	3416
	3417	if ((fp->f_fglob->fg_flag & FHASLOCK) &&
	3418	(FILEGLOB_DTYPE(fp->f_fglob) == DTYPE_VNODE)) {
	3419	lf.l_whence = SEEK_SET;
	3420	lf.l_start = 0;
	3421	lf.l_len = 0;
	3422	lf.l_type = F_UNLCK;
	3423
	3424	(void)VNOP_ADVLOCK(
	3425	vp, (caddr_t)fp->f_fglob, F_UNLCK, &lf, F_FLOCK, ctx, NULL);
	3426	}
	3427
	3428	vn_close(vp, fp->f_fglob->fg_flag, &context);
	3429	vnode_put(vp);
	3430	fp_free(p, indx, fp);
	3431
	3432	return (error);
	3433	}
	3434
	3435	/*
	3436	* While most of the *at syscall handlers can call nameiat() which
	3437	* is a wrapper around namei, the use of namei and initialisation
	3438	* of nameidata are far removed and in different functions - namei
	3439	* gets called in vn_open_auth for open1. So we'll just do here what
	3440	* nameiat() does.
	3441	*/
	3442	static int
	3443	open1at(vfs_context_t ctx, struct nameidata *ndp, int uflags,
	3444	struct vnode_attr vap, fp_allocfn_t fp_zalloc, void cra, int32_t *retval,
	3445	int dirfd)
	3446	{
	3447	if ((dirfd != AT_FDCWD) && !(ndp->ni_cnd.cn_flags & USEDVP)) {
	3448	int error;
	3449	char c;
	3450
	3451	if (UIO_SEG_IS_USER_SPACE(ndp->ni_segflg)) {
	3452	error = copyin(ndp->ni_dirp, &c, sizeof(char));
	3453	if (error)
	3454	return (error);
	3455	} else {
	3456	c = ((char )(ndp->ni_dirp));
	3457	}
	3458
	3459	if (c != '/') {
	3460	vnode_t dvp_at;
	3461
	3462	error = vnode_getfromfd(ndp->ni_cnd.cn_context, dirfd,
	3463	&dvp_at);
	3464	if (error)
	3465	return (error);
	3466
	3467	if (vnode_vtype(dvp_at) != VDIR) {
	3468	vnode_put(dvp_at);
	3469	return (ENOTDIR);
	3470	}
	3471
	3472	ndp->ni_dvp = dvp_at;
	3473	ndp->ni_cnd.cn_flags \|= USEDVP;
	3474	error = open1(ctx, ndp, uflags, vap, fp_zalloc, cra,
	3475	retval);
	3476	vnode_put(dvp_at);
	3477	return (error);
	3478	}
	3479	}
	3480
	3481	return (open1(ctx, ndp, uflags, vap, fp_zalloc, cra, retval));
	3482	}
	3483
	3484	/*
	3485	* open_extended: open a file given a path name; with extended argument list (including extended security (ACL)).
	3486	*
	3487	* Parameters: p Process requesting the open
	3488	* uap User argument descriptor (see below)
	3489	* retval Pointer to an area to receive the
	3490	* return calue from the system call
	3491	*
	3492	* Indirect: uap->path Path to open (same as 'open')
	3493	* uap->flags Flags to open (same as 'open'
	3494	* uap->uid UID to set, if creating
	3495	* uap->gid GID to set, if creating
	3496	* uap->mode File mode, if creating (same as 'open')
	3497	* uap->xsecurity ACL to set, if creating
	3498	*
	3499	* Returns: 0 Success
	3500	* !0 errno value
	3501	*
	3502	* Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
	3503	*
	3504	* XXX: We should enummerate the possible errno values here, and where
	3505	* in the code they originated.
	3506	*/
	3507	int
	3508	open_extended(proc_t p, struct open_extended_args uap, int32_t retval)
	3509	{
	3510	struct filedesc *fdp = p->p_fd;
	3511	int ciferror;
	3512	kauth_filesec_t xsecdst;
	3513	struct vnode_attr va;
	3514	struct nameidata nd;
	3515	int cmode;
	3516
	3517	AUDIT_ARG(owner, uap->uid, uap->gid);
	3518
	3519	xsecdst = NULL;
	3520	if ((uap->xsecurity != USER_ADDR_NULL) &&
	3521	((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
	3522	return ciferror;
	3523
	3524	VATTR_INIT(&va);
	3525	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
	3526	VATTR_SET(&va, va_mode, cmode);
	3527	if (uap->uid != KAUTH_UID_NONE)
	3528	VATTR_SET(&va, va_uid, uap->uid);
	3529	if (uap->gid != KAUTH_GID_NONE)
	3530	VATTR_SET(&va, va_gid, uap->gid);
	3531	if (xsecdst != NULL)
	3532	VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
	3533
	3534	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
	3535	uap->path, vfs_context_current());
	3536
	3537	ciferror = open1(vfs_context_current(), &nd, uap->flags, &va,
	3538	fileproc_alloc_init, NULL, retval);
	3539	if (xsecdst != NULL)
	3540	kauth_filesec_free(xsecdst);
	3541
	3542	return ciferror;
	3543	}
	3544
	3545	/*
	3546	* Go through the data-protected atomically controlled open (2)
	3547	*
	3548	* int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode)
	3549	*/
	3550	int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args uap, int32_t retval) {
	3551	int flags = uap->flags;
	3552	int class = uap->class;
	3553	int dpflags = uap->dpflags;
	3554
	3555	/*
	3556	* Follow the same path as normal open(2)
	3557	* Look up the item if it exists, and acquire the vnode.
	3558	*/
	3559	struct filedesc *fdp = p->p_fd;
	3560	struct vnode_attr va;
	3561	struct nameidata nd;
	3562	int cmode;
	3563	int error;
	3564
	3565	VATTR_INIT(&va);
	3566	/* Mask off all but regular access permissions */
	3567	cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
	3568	VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
	3569
	3570	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
	3571	uap->path, vfs_context_current());
	3572
	3573	/*
	3574	* Initialize the extra fields in vnode_attr to pass down our
	3575	* extra fields.
	3576	* 1. target cprotect class.
	3577	* 2. set a flag to mark it as requiring open-raw-encrypted semantics.
	3578	*/
	3579	if (flags & O_CREAT) {
	3580	/* lower level kernel code validates that the class is valid before applying it. */
	3581	if (class != PROTECTION_CLASS_DEFAULT) {
	3582	/*
	3583	* PROTECTION_CLASS_DEFAULT implies that we make the class for this
	3584	* file behave the same as open (2)
	3585	*/
	3586	VATTR_SET(&va, va_dataprotect_class, class);
	3587	}
	3588	}
	3589
	3590	if (dpflags & (O_DP_GETRAWENCRYPTED\|O_DP_GETRAWUNENCRYPTED)) {
	3591	if ( flags & (O_RDWR \| O_WRONLY)) {
	3592	/* Not allowed to write raw encrypted bytes */
	3593	return EINVAL;
	3594	}
	3595	if (uap->dpflags & O_DP_GETRAWENCRYPTED) {
	3596	VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED);
	3597	}
	3598	if (uap->dpflags & O_DP_GETRAWUNENCRYPTED) {
	3599	VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWUNENCRYPTED);
	3600	}
	3601	}
	3602
	3603	error = open1(vfs_context_current(), &nd, uap->flags, &va,
	3604	fileproc_alloc_init, NULL, retval);
	3605
	3606	return error;
	3607	}
	3608
	3609	static int
	3610	openat_internal(vfs_context_t ctx, user_addr_t path, int flags, int mode,
	3611	int fd, enum uio_seg segflg, int *retval)
	3612	{
	3613	struct filedesc *fdp = (vfs_context_proc(ctx))->p_fd;
	3614	struct vnode_attr va;
	3615	struct nameidata nd;
	3616	int cmode;
	3617
	3618	VATTR_INIT(&va);
	3619	/* Mask off all but regular access permissions */
	3620	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
	3621	VATTR_SET(&va, va_mode, cmode & ACCESSPERMS);
	3622
	3623	NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW \| AUDITVNPATH1,
	3624	segflg, path, ctx);
	3625
	3626	return (open1at(ctx, &nd, flags, &va, fileproc_alloc_init, NULL,
	3627	retval, fd));
	3628	}
	3629
	3630	int
	3631	open(proc_t p, struct open_args uap, int32_t retval)
	3632	{
	3633	__pthread_testcancel(1);
	3634	return(open_nocancel(p, (struct open_nocancel_args *)uap, retval));
	3635	}
	3636
	3637	int
	3638	open_nocancel(__unused proc_t p, struct open_nocancel_args *uap,
	3639	int32_t *retval)
	3640	{
	3641	return (openat_internal(vfs_context_current(), uap->path, uap->flags,
	3642	uap->mode, AT_FDCWD, UIO_USERSPACE, retval));
	3643	}
	3644
	3645	int
	3646	openat_nocancel(__unused proc_t p, struct openat_nocancel_args *uap,
	3647	int32_t *retval)
	3648	{
	3649	return (openat_internal(vfs_context_current(), uap->path, uap->flags,
	3650	uap->mode, uap->fd, UIO_USERSPACE, retval));
	3651	}
	3652
	3653	int
	3654	openat(proc_t p, struct openat_args uap, int32_t retval)
	3655	{
	3656	__pthread_testcancel(1);
	3657	return(openat_nocancel(p, (struct openat_nocancel_args *)uap, retval));
	3658	}
	3659
	3660	/*
	3661	* openbyid_np: open a file given a file system id and a file system object id
	3662	* the hfs file system object id is an fsobj_id_t {uint32, uint32}
	3663	* file systems that don't support object ids it is a node id (uint64_t).
	3664	*
	3665	* Parameters: p Process requesting the open
	3666	* uap User argument descriptor (see below)
	3667	* retval Pointer to an area to receive the
	3668	* return calue from the system call
	3669	*
	3670	* Indirect: uap->path Path to open (same as 'open')
	3671	*
	3672	* uap->fsid id of target file system
	3673	* uap->objid id of target file system object
	3674	* uap->flags Flags to open (same as 'open')
	3675	*
	3676	* Returns: 0 Success
	3677	* !0 errno value
	3678	*
	3679	*
	3680	* XXX: We should enummerate the possible errno values here, and where
	3681	* in the code they originated.
	3682	*/
	3683	int
	3684	openbyid_np(__unused proc_t p, struct openbyid_np_args uap, int retval)
	3685	{
	3686	fsid_t fsid;
	3687	uint64_t objid;
	3688	int error;
	3689	char *buf = NULL;
	3690	int buflen = MAXPATHLEN;
	3691	int pathlen = 0;
	3692	vfs_context_t ctx = vfs_context_current();
	3693
	3694	if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
	3695	return (error);
	3696	}
	3697
	3698	/uap->obj is an fsobj_id_t defined as struct {uint32_t, uint32_t} /
	3699	if ((error = copyin(uap->objid, (caddr_t)&objid, sizeof(uint64_t)))) {
	3700	return (error);
	3701	}
	3702
	3703	AUDIT_ARG(value32, fsid.val[0]);
	3704	AUDIT_ARG(value64, objid);
	3705
	3706	/resolve path from fsis, objid/
	3707	do {
	3708	MALLOC(buf, char *, buflen + 1, M_TEMP, M_WAITOK);
	3709	if (buf == NULL) {
	3710	return (ENOMEM);
	3711	}
	3712
	3713	error = fsgetpath_internal(
	3714	ctx, fsid.val[0], objid,
	3715	buflen, buf, &pathlen);
	3716
	3717	if (error) {
	3718	FREE(buf, M_TEMP);
	3719	buf = NULL;
	3720	}
	3721	} while (error == ENOSPC && (buflen += MAXPATHLEN));
	3722
	3723	if (error) {
	3724	return error;
	3725	}
	3726
	3727	buf[pathlen] = 0;
	3728
	3729	error = openat_internal(
	3730	ctx, (user_addr_t)buf, uap->oflags, 0, AT_FDCWD, UIO_SYSSPACE, retval);
	3731
	3732	FREE(buf, M_TEMP);
	3733
	3734	return error;
	3735	}
	3736
	3737
	3738	/*
	3739	* Create a special file.
	3740	*/
	3741	static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap);
	3742
	3743	int
	3744	mknod(proc_t p, struct mknod_args uap, __unused int32_t retval)
	3745	{
	3746	struct vnode_attr va;
	3747	vfs_context_t ctx = vfs_context_current();
	3748	int error;
	3749	struct nameidata nd;
	3750	vnode_t vp, dvp;
	3751
	3752	VATTR_INIT(&va);
	3753	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
	3754	VATTR_SET(&va, va_rdev, uap->dev);
	3755
	3756	/* If it's a mknod() of a FIFO, call mkfifo1() instead */
	3757	if ((uap->mode & S_IFMT) == S_IFIFO)
	3758	return(mkfifo1(ctx, uap->path, &va));
	3759
	3760	AUDIT_ARG(mode, uap->mode);
	3761	AUDIT_ARG(value32, uap->dev);
	3762
	3763	if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
	3764	return (error);
	3765	NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT \| AUDITVNPATH1,
	3766	UIO_USERSPACE, uap->path, ctx);
	3767	error = namei(&nd);
	3768	if (error)
	3769	return (error);
	3770	dvp = nd.ni_dvp;
	3771	vp = nd.ni_vp;
	3772
	3773	if (vp != NULL) {
	3774	error = EEXIST;
	3775	goto out;
	3776	}
	3777
	3778	switch (uap->mode & S_IFMT) {
	3779	case S_IFCHR:
	3780	VATTR_SET(&va, va_type, VCHR);
	3781	break;
	3782	case S_IFBLK:
	3783	VATTR_SET(&va, va_type, VBLK);
	3784	break;
	3785	default:
	3786	error = EINVAL;
	3787	goto out;
	3788	}
	3789
	3790	#if CONFIG_MACF
	3791	error = mac_vnode_check_create(ctx,
	3792	nd.ni_dvp, &nd.ni_cnd, &va);
	3793	if (error)
	3794	goto out;
	3795	#endif
	3796
	3797	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
	3798	goto out;
	3799
	3800	if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0)
	3801	goto out;
	3802
	3803	if (vp) {
	3804	int update_flags = 0;
	3805
	3806	// Make sure the name & parent pointers are hooked up
	3807	if (vp->v_name == NULL)
	3808	update_flags \|= VNODE_UPDATE_NAME;
	3809	if (vp->v_parent == NULLVP)
	3810	update_flags \|= VNODE_UPDATE_PARENT;
	3811
	3812	if (update_flags)
	3813	vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
	3814
	3815	#if CONFIG_FSE
	3816	add_fsevent(FSE_CREATE_FILE, ctx,
	3817	FSE_ARG_VNODE, vp,
	3818	FSE_ARG_DONE);
	3819	#endif
	3820	}
	3821
	3822	out:
	3823	/*
	3824	* nameidone has to happen before we vnode_put(dvp)
	3825	* since it may need to release the fs_nodelock on the dvp
	3826	*/
	3827	nameidone(&nd);
	3828
	3829	if (vp)
	3830	vnode_put(vp);
	3831	vnode_put(dvp);
	3832
	3833	return (error);
	3834	}
	3835
	3836	/*
	3837	* Create a named pipe.
	3838	*
	3839	* Returns: 0 Success
	3840	* EEXIST
	3841	* namei:???
	3842	* vnode_authorize:???
	3843	* vn_create:???
	3844	*/
	3845	static int
	3846	mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap)
	3847	{
	3848	vnode_t vp, dvp;
	3849	int error;
	3850	struct nameidata nd;
	3851
	3852	NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT \| AUDITVNPATH1,
	3853	UIO_USERSPACE, upath, ctx);
	3854	error = namei(&nd);
	3855	if (error)
	3856	return (error);
	3857	dvp = nd.ni_dvp;
	3858	vp = nd.ni_vp;
	3859
	3860	/* check that this is a new file and authorize addition */
	3861	if (vp != NULL) {
	3862	error = EEXIST;
	3863	goto out;
	3864	}
	3865	VATTR_SET(vap, va_type, VFIFO);
	3866
	3867	if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0)
	3868	goto out;
	3869
	3870	error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx);
	3871	out:
	3872	/*
	3873	* nameidone has to happen before we vnode_put(dvp)
	3874	* since it may need to release the fs_nodelock on the dvp
	3875	*/
	3876	nameidone(&nd);
	3877
	3878	if (vp)
	3879	vnode_put(vp);
	3880	vnode_put(dvp);
	3881
	3882	return error;
	3883	}
	3884
	3885
	3886	/*
	3887	* mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)).
	3888	*
	3889	* Parameters: p Process requesting the open
	3890	* uap User argument descriptor (see below)
	3891	* retval (Ignored)
	3892	*
	3893	* Indirect: uap->path Path to fifo (same as 'mkfifo')
	3894	* uap->uid UID to set
	3895	* uap->gid GID to set
	3896	* uap->mode File mode to set (same as 'mkfifo')
	3897	* uap->xsecurity ACL to set, if creating
	3898	*
	3899	* Returns: 0 Success
	3900	* !0 errno value
	3901	*
	3902	* Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
	3903	*
	3904	* XXX: We should enummerate the possible errno values here, and where
	3905	* in the code they originated.
	3906	*/
	3907	int
	3908	mkfifo_extended(proc_t p, struct mkfifo_extended_args uap, __unused int32_t retval)
	3909	{
	3910	int ciferror;
	3911	kauth_filesec_t xsecdst;
	3912	struct vnode_attr va;
	3913
	3914	AUDIT_ARG(owner, uap->uid, uap->gid);
	3915
	3916	xsecdst = KAUTH_FILESEC_NONE;
	3917	if (uap->xsecurity != USER_ADDR_NULL) {
	3918	if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
	3919	return ciferror;
	3920	}
	3921
	3922	VATTR_INIT(&va);
	3923	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
	3924	if (uap->uid != KAUTH_UID_NONE)
	3925	VATTR_SET(&va, va_uid, uap->uid);
	3926	if (uap->gid != KAUTH_GID_NONE)
	3927	VATTR_SET(&va, va_gid, uap->gid);
	3928	if (xsecdst != KAUTH_FILESEC_NONE)
	3929	VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
	3930
	3931	ciferror = mkfifo1(vfs_context_current(), uap->path, &va);
	3932
	3933	if (xsecdst != KAUTH_FILESEC_NONE)
	3934	kauth_filesec_free(xsecdst);
	3935	return ciferror;
	3936	}
	3937
	3938	/* ARGSUSED */
	3939	int
	3940	mkfifo(proc_t p, struct mkfifo_args uap, __unused int32_t retval)
	3941	{
	3942	struct vnode_attr va;
	3943
	3944	VATTR_INIT(&va);
	3945	VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask);
	3946
	3947	return(mkfifo1(vfs_context_current(), uap->path, &va));
	3948	}
	3949
	3950
	3951	static char *
	3952	my_strrchr(char *p, int ch)
	3953	{
	3954	char *save;
	3955
	3956	for (save = NULL;; ++p) {
	3957	if (*p == ch)
	3958	save = p;
	3959	if (!*p)
	3960	return(save);
	3961	}
	3962	/* NOTREACHED */
	3963	}
	3964
	3965	extern int safe_getpath(struct vnode dvp, char leafname, char path, int _len, int truncated_path);
	3966
	3967	int
	3968	safe_getpath(struct vnode dvp, char leafname, char path, int _len, int truncated_path)
	3969	{
	3970	int ret, len = _len;
	3971
	3972	*truncated_path = 0;
	3973	ret = vn_getpath(dvp, path, &len);
	3974	if (ret == 0 && len < (MAXPATHLEN - 1)) {
	3975	if (leafname) {
	3976	path[len-1] = '/';
	3977	len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1;
	3978	if (len > MAXPATHLEN) {
	3979	char *ptr;
	3980
	3981	// the string got truncated!
	3982	*truncated_path = 1;
	3983	ptr = my_strrchr(path, '/');
	3984	if (ptr) {
	3985	*ptr = '\0'; // chop off the string at the last directory component
	3986	}
	3987	len = strlen(path) + 1;
	3988	}
	3989	}
	3990	} else if (ret == 0) {
	3991	*truncated_path = 1;
	3992	} else if (ret != 0) {
	3993	struct vnode *mydvp=dvp;
	3994
	3995	if (ret != ENOSPC) {
	3996	printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n",
	3997	dvp, dvp->v_name ? dvp->v_name : "no-name", ret);
	3998	}
	3999	*truncated_path = 1;
	4000
	4001	do {
	4002	if (mydvp->v_parent != NULL) {
	4003	mydvp = mydvp->v_parent;
	4004	} else if (mydvp->v_mount) {
	4005	strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len);
	4006	break;
	4007	} else {
	4008	// no parent and no mount point? only thing is to punt and say "/" changed
	4009	strlcpy(path, "/", _len);
	4010	len = 2;
	4011	mydvp = NULL;
	4012	}
	4013
	4014	if (mydvp == NULL) {
	4015	break;
	4016	}
	4017
	4018	len = _len;
	4019	ret = vn_getpath(mydvp, path, &len);
	4020	} while (ret == ENOSPC);
	4021	}
	4022
	4023	return len;
	4024	}
	4025
	4026
	4027	/*
	4028	* Make a hard file link.
	4029	*
	4030	* Returns: 0 Success
	4031	* EPERM
	4032	* EEXIST
	4033	* EXDEV
	4034	* namei:???
	4035	* vnode_authorize:???
	4036	* VNOP_LINK:???
	4037	*/
	4038	/* ARGSUSED */
	4039	static int
	4040	linkat_internal(vfs_context_t ctx, int fd1, user_addr_t path, int fd2,
	4041	user_addr_t link, int flag, enum uio_seg segflg)
	4042	{
	4043	vnode_t vp, dvp, lvp;
	4044	struct nameidata nd;
	4045	int follow;
	4046	int error;
	4047	#if CONFIG_FSE
	4048	fse_info finfo;
	4049	#endif
	4050	int need_event, has_listeners;
	4051	char *target_path = NULL;
	4052	int truncated=0;
	4053
	4054	vp = dvp = lvp = NULLVP;
	4055
	4056	/* look up the object we are linking to */
	4057	follow = (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW;
	4058	NDINIT(&nd, LOOKUP, OP_LOOKUP, AUDITVNPATH1 \| follow,
	4059	segflg, path, ctx);
	4060
	4061	error = nameiat(&nd, fd1);
	4062	if (error)
	4063	return (error);
	4064	vp = nd.ni_vp;
	4065
	4066	nameidone(&nd);
	4067
	4068	/*
	4069	* Normally, linking to directories is not supported.
	4070	* However, some file systems may have limited support.
	4071	*/
	4072	if (vp->v_type == VDIR) {
	4073	if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) {
	4074	error = EPERM; /* POSIX */
	4075	goto out;
	4076	}
	4077	/* Linking to a directory requires ownership. */
	4078	if (!kauth_cred_issuser(vfs_context_ucred(ctx))) {
	4079	struct vnode_attr dva;
	4080
	4081	VATTR_INIT(&dva);
	4082	VATTR_WANTED(&dva, va_uid);
	4083	if (vnode_getattr(vp, &dva, ctx) != 0 \|\|
	4084	!VATTR_IS_SUPPORTED(&dva, va_uid) \|\|
	4085	(dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) {
	4086	error = EACCES;
	4087	goto out;
	4088	}
	4089	}
	4090	}
	4091
	4092	/* lookup the target node */
	4093	#if CONFIG_TRIGGERS
	4094	nd.ni_op = OP_LINK;
	4095	#endif
	4096	nd.ni_cnd.cn_nameiop = CREATE;
	4097	nd.ni_cnd.cn_flags = LOCKPARENT \| AUDITVNPATH2 \| CN_NBMOUNTLOOK;
	4098	nd.ni_dirp = link;
	4099	error = nameiat(&nd, fd2);
	4100	if (error != 0)
	4101	goto out;
	4102	dvp = nd.ni_dvp;
	4103	lvp = nd.ni_vp;
	4104
	4105	#if CONFIG_MACF
	4106	if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0)
	4107	goto out2;
	4108	#endif
	4109
	4110	/* or to anything that kauth doesn't want us to (eg. immutable items) */
	4111	if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0)
	4112	goto out2;
	4113
	4114	/* target node must not exist */
	4115	if (lvp != NULLVP) {
	4116	error = EEXIST;
	4117	goto out2;
	4118	}
	4119	/* cannot link across mountpoints */
	4120	if (vnode_mount(vp) != vnode_mount(dvp)) {
	4121	error = EXDEV;
	4122	goto out2;
	4123	}
	4124
	4125	/* authorize creation of the target note */
	4126	if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
	4127	goto out2;
	4128
	4129	/* and finally make the link */
	4130	error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx);
	4131	if (error)
	4132	goto out2;
	4133
	4134	#if CONFIG_MACF
	4135	(void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd);
	4136	#endif
	4137
	4138	#if CONFIG_FSE
	4139	need_event = need_fsevent(FSE_CREATE_FILE, dvp);
	4140	#else
	4141	need_event = 0;
	4142	#endif
	4143	has_listeners = kauth_authorize_fileop_has_listeners();
	4144
	4145	if (need_event \|\| has_listeners) {
	4146	char *link_to_path = NULL;
	4147	int len, link_name_len;
	4148
	4149	/* build the path to the new link file */
	4150	GET_PATH(target_path);
	4151	if (target_path == NULL) {
	4152	error = ENOMEM;
	4153	goto out2;
	4154	}
	4155
	4156	len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated);
	4157
	4158	if (has_listeners) {
	4159	/* build the path to file we are linking to */
	4160	GET_PATH(link_to_path);
	4161	if (link_to_path == NULL) {
	4162	error = ENOMEM;
	4163	goto out2;
	4164	}
	4165
	4166	link_name_len = MAXPATHLEN;
	4167	if (vn_getpath(vp, link_to_path, &link_name_len) == 0) {
	4168	/*
	4169	* Call out to allow 3rd party notification of rename.
	4170	* Ignore result of kauth_authorize_fileop call.
	4171	*/
	4172	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK,
	4173	(uintptr_t)link_to_path,
	4174	(uintptr_t)target_path);
	4175	}
	4176	if (link_to_path != NULL) {
	4177	RELEASE_PATH(link_to_path);
	4178	}
	4179	}
	4180	#if CONFIG_FSE
	4181	if (need_event) {
	4182	/* construct fsevent */
	4183	if (get_fse_info(vp, &finfo, ctx) == 0) {
	4184	if (truncated) {
	4185	finfo.mode \|= FSE_TRUNCATED_PATH;
	4186	}
	4187
	4188	// build the path to the destination of the link
	4189	add_fsevent(FSE_CREATE_FILE, ctx,
	4190	FSE_ARG_STRING, len, target_path,
	4191	FSE_ARG_FINFO, &finfo,
	4192	FSE_ARG_DONE);
	4193	}
	4194	if (vp->v_parent) {
	4195	add_fsevent(FSE_STAT_CHANGED, ctx,
	4196	FSE_ARG_VNODE, vp->v_parent,
	4197	FSE_ARG_DONE);
	4198	}
	4199	}
	4200	#endif
	4201	}
	4202	out2:
	4203	/*
	4204	* nameidone has to happen before we vnode_put(dvp)
	4205	* since it may need to release the fs_nodelock on the dvp
	4206	*/
	4207	nameidone(&nd);
	4208	if (target_path != NULL) {
	4209	RELEASE_PATH(target_path);
	4210	}
	4211	out:
	4212	if (lvp)
	4213	vnode_put(lvp);
	4214	if (dvp)
	4215	vnode_put(dvp);
	4216	vnode_put(vp);
	4217	return (error);
	4218	}
	4219
	4220	int
	4221	link(__unused proc_t p, struct link_args uap, __unused int32_t retval)
	4222	{
	4223	return (linkat_internal(vfs_context_current(), AT_FDCWD, uap->path,
	4224	AT_FDCWD, uap->link, AT_SYMLINK_FOLLOW, UIO_USERSPACE));
	4225	}
	4226
	4227	int
	4228	linkat(__unused proc_t p, struct linkat_args uap, __unused int32_t retval)
	4229	{
	4230	if (uap->flag & ~AT_SYMLINK_FOLLOW)
	4231	return (EINVAL);
	4232
	4233	return (linkat_internal(vfs_context_current(), uap->fd1, uap->path,
	4234	uap->fd2, uap->link, uap->flag, UIO_USERSPACE));
	4235	}
	4236
	4237	/*
	4238	* Make a symbolic link.
	4239	*
	4240	* We could add support for ACLs here too...
	4241	*/
	4242	/* ARGSUSED */
	4243	static int
	4244	symlinkat_internal(vfs_context_t ctx, user_addr_t path_data, int fd,
	4245	user_addr_t link, enum uio_seg segflg)
	4246	{
	4247	struct vnode_attr va;
	4248	char *path;
	4249	int error;
	4250	struct nameidata nd;
	4251	vnode_t vp, dvp;
	4252	uint32_t dfflags; // Directory file flags
	4253	size_t dummy=0;
	4254	proc_t p;
	4255
	4256	error = 0;
	4257	if (UIO_SEG_IS_USER_SPACE(segflg)) {
	4258	MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
	4259	error = copyinstr(path_data, path, MAXPATHLEN, &dummy);
	4260	} else {
	4261	path = (char *)path_data;
	4262	}
	4263	if (error)
	4264	goto out;
	4265	AUDIT_ARG(text, path); /* This is the link string */
	4266
	4267	NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT \| AUDITVNPATH1,
	4268	segflg, link, ctx);
	4269
	4270	error = nameiat(&nd, fd);
	4271	if (error)
	4272	goto out;
	4273	dvp = nd.ni_dvp;
	4274	vp = nd.ni_vp;
	4275
	4276	p = vfs_context_proc(ctx);
	4277	VATTR_INIT(&va);
	4278	VATTR_SET(&va, va_type, VLNK);
	4279	VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask);
	4280
	4281	/*
	4282	* Handle inheritance of restricted flag
	4283	*/
	4284	error = vnode_flags(dvp, &dfflags, ctx);
	4285	if (error)
	4286	goto skipit;
	4287	if (dfflags & SF_RESTRICTED)
	4288	VATTR_SET(&va, va_flags, SF_RESTRICTED);
	4289
	4290	#if CONFIG_MACF
	4291	error = mac_vnode_check_create(ctx,
	4292	dvp, &nd.ni_cnd, &va);
	4293	#endif
	4294	if (error != 0) {
	4295	goto skipit;
	4296	}
	4297
	4298	if (vp != NULL) {
	4299	error = EEXIST;
	4300	goto skipit;
	4301	}
	4302
	4303	/* authorize */
	4304	if (error == 0)
	4305	error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);
	4306	/* get default ownership, etc. */
	4307	if (error == 0)
	4308	error = vnode_authattr_new(dvp, &va, 0, ctx);
	4309	if (error == 0)
	4310	error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx);
	4311
	4312	#if CONFIG_MACF
	4313	if (error == 0 && vp)
	4314	error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx);
	4315	#endif
	4316
	4317	/* do fallback attribute handling */
	4318	if (error == 0 && vp)
	4319	error = vnode_setattr_fallback(vp, &va, ctx);
	4320
	4321	if (error == 0) {
	4322	int update_flags = 0;
	4323
	4324	/check if a new vnode was created, else try to get one/
	4325	if (vp == NULL) {
	4326	nd.ni_cnd.cn_nameiop = LOOKUP;
	4327	#if CONFIG_TRIGGERS
	4328	nd.ni_op = OP_LOOKUP;
	4329	#endif
	4330	nd.ni_cnd.cn_flags = 0;
	4331	error = nameiat(&nd, fd);
	4332	vp = nd.ni_vp;
	4333
	4334	if (vp == NULL)
	4335	goto skipit;
	4336	}
	4337
	4338	#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */
	4339	/* call out to allow 3rd party notification of rename.
	4340	* Ignore result of kauth_authorize_fileop call.
	4341	*/
	4342	if (kauth_authorize_fileop_has_listeners() &&
	4343	namei(&nd) == 0) {
	4344	char *new_link_path = NULL;
	4345	int len;
	4346
	4347	/* build the path to the new link file */
	4348	new_link_path = get_pathbuff();
	4349	len = MAXPATHLEN;
	4350	vn_getpath(dvp, new_link_path, &len);
	4351	if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) {
	4352	new_link_path[len - 1] = '/';
	4353	strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len);
	4354	}
	4355
	4356	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK,
	4357	(uintptr_t)path, (uintptr_t)new_link_path);
	4358	if (new_link_path != NULL)
	4359	release_pathbuff(new_link_path);
	4360	}
	4361	#endif
	4362	// Make sure the name & parent pointers are hooked up
	4363	if (vp->v_name == NULL)
	4364	update_flags \|= VNODE_UPDATE_NAME;
	4365	if (vp->v_parent == NULLVP)
	4366	update_flags \|= VNODE_UPDATE_PARENT;
	4367
	4368	if (update_flags)
	4369	vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
	4370
	4371	#if CONFIG_FSE
	4372	add_fsevent(FSE_CREATE_FILE, ctx,
	4373	FSE_ARG_VNODE, vp,
	4374	FSE_ARG_DONE);
	4375	#endif
	4376	}
	4377
	4378	skipit:
	4379	/*
	4380	* nameidone has to happen before we vnode_put(dvp)
	4381	* since it may need to release the fs_nodelock on the dvp
	4382	*/
	4383	nameidone(&nd);
	4384
	4385	if (vp)
	4386	vnode_put(vp);
	4387	vnode_put(dvp);
	4388	out:
	4389	if (path && (path != (char *)path_data))
	4390	FREE_ZONE(path, MAXPATHLEN, M_NAMEI);
	4391
	4392	return (error);
	4393	}
	4394
	4395	int
	4396	symlink(__unused proc_t p, struct symlink_args uap, __unused int32_t retval)
	4397	{
	4398	return (symlinkat_internal(vfs_context_current(), uap->path, AT_FDCWD,
	4399	uap->link, UIO_USERSPACE));
	4400	}
	4401
	4402	int
	4403	symlinkat(__unused proc_t p, struct symlinkat_args *uap,
	4404	__unused int32_t *retval)
	4405	{
	4406	return (symlinkat_internal(vfs_context_current(), uap->path1, uap->fd,
	4407	uap->path2, UIO_USERSPACE));
	4408	}
	4409
	4410	/*
	4411	* Delete a whiteout from the filesystem.
	4412	* No longer supported.
	4413	*/
	4414	int
	4415	undelete(__unused proc_t p, __unused struct undelete_args uap, __unused int32_t retval)
	4416	{
	4417	return (ENOTSUP);
	4418	}
	4419
	4420	/*
	4421	* Delete a name from the filesystem.
	4422	*/
	4423	/* ARGSUSED */
	4424	static int
	4425	unlinkat_internal(vfs_context_t ctx, int fd, vnode_t start_dvp,
	4426	user_addr_t path_arg, enum uio_seg segflg, int unlink_flags)
	4427	{
	4428	struct nameidata nd;
	4429	vnode_t vp, dvp;
	4430	int error;
	4431	struct componentname *cnp;
	4432	char *path = NULL;
	4433	int len=0;
	4434	#if CONFIG_FSE
	4435	fse_info finfo;
	4436	struct vnode_attr va;
	4437	#endif
	4438	int flags;
	4439	int need_event;
	4440	int has_listeners;
	4441	int truncated_path;
	4442	int batched;
	4443	struct vnode_attr *vap;
	4444	int do_retry;
	4445	int retry_count = 0;
	4446	int cn_flags;
	4447
	4448	cn_flags = LOCKPARENT;
	4449	if (!(unlink_flags & VNODE_REMOVE_NO_AUDIT_PATH))
	4450	cn_flags \|= AUDITVNPATH1;
	4451	/* If a starting dvp is passed, it trumps any fd passed. */
	4452	if (start_dvp)
	4453	cn_flags \|= USEDVP;
	4454
	4455	#if NAMEDRSRCFORK
	4456	/* unlink or delete is allowed on rsrc forks and named streams */
	4457	cn_flags \|= CN_ALLOWRSRCFORK;
	4458	#endif
	4459
	4460	retry:
	4461	do_retry = 0;
	4462	flags = 0;
	4463	need_event = 0;
	4464	has_listeners = 0;
	4465	truncated_path = 0;
	4466	vap = NULL;
	4467
	4468	NDINIT(&nd, DELETE, OP_UNLINK, cn_flags, segflg, path_arg, ctx);
	4469
	4470	nd.ni_dvp = start_dvp;
	4471	nd.ni_flag \|= NAMEI_COMPOUNDREMOVE;
	4472	cnp = &nd.ni_cnd;
	4473
	4474	lookup_continue:
	4475	error = nameiat(&nd, fd);
	4476	if (error)
	4477	return (error);
	4478
	4479	dvp = nd.ni_dvp;
	4480	vp = nd.ni_vp;
	4481
	4482
	4483	/* With Carbon delete semantics, busy files cannot be deleted */
	4484	if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) {
	4485	flags \|= VNODE_REMOVE_NODELETEBUSY;
	4486	}
	4487
	4488	/* Skip any potential upcalls if told to. */
	4489	if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) {
	4490	flags \|= VNODE_REMOVE_SKIP_NAMESPACE_EVENT;
	4491	}
	4492
	4493	if (vp) {
	4494	batched = vnode_compound_remove_available(vp);
	4495	/*
	4496	* The root of a mounted filesystem cannot be deleted.
	4497	*/
	4498	if (vp->v_flag & VROOT) {
	4499	error = EBUSY;
	4500	}
	4501
	4502	if (!batched) {
	4503	error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL);
	4504	if (error) {
	4505	if (error == ENOENT) {
	4506	assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	4507	if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	4508	do_retry = 1;
	4509	retry_count++;
	4510	}
	4511	}
	4512	goto out;
	4513	}
	4514	}
	4515	} else {
	4516	batched = 1;
	4517
	4518	if (!vnode_compound_remove_available(dvp)) {
	4519	panic("No vp, but no compound remove?");
	4520	}
	4521	}
	4522
	4523	#if CONFIG_FSE
	4524	need_event = need_fsevent(FSE_DELETE, dvp);
	4525	if (need_event) {
	4526	if (!batched) {
	4527	if ((vp->v_flag & VISHARDLINK) == 0) {
	4528	/* XXX need to get these data in batched VNOP */
	4529	get_fse_info(vp, &finfo, ctx);
	4530	}
	4531	} else {
	4532	error = vfs_get_notify_attributes(&va);
	4533	if (error) {
	4534	goto out;
	4535	}
	4536
	4537	vap = &va;
	4538	}
	4539	}
	4540	#endif
	4541	has_listeners = kauth_authorize_fileop_has_listeners();
	4542	if (need_event \|\| has_listeners) {
	4543	if (path == NULL) {
	4544	GET_PATH(path);
	4545	if (path == NULL) {
	4546	error = ENOMEM;
	4547	goto out;
	4548	}
	4549	}
	4550	len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path);
	4551	}
	4552
	4553	#if NAMEDRSRCFORK
	4554	if (nd.ni_cnd.cn_flags & CN_WANTSRSRCFORK)
	4555	error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx);
	4556	else
	4557	#endif
	4558	{
	4559	error = vn_remove(dvp, &nd.ni_vp, &nd, flags, vap, ctx);
	4560	vp = nd.ni_vp;
	4561	if (error == EKEEPLOOKING) {
	4562	if (!batched) {
	4563	panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?");
	4564	}
	4565
	4566	if ((nd.ni_flag & NAMEI_CONTLOOKUP) == 0) {
	4567	panic("EKEEPLOOKING, but continue flag not set?");
	4568	}
	4569
	4570	if (vnode_isdir(vp)) {
	4571	error = EISDIR;
	4572	goto out;
	4573	}
	4574	goto lookup_continue;
	4575	} else if (error == ENOENT && batched) {
	4576	assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	4577	if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	4578	/*
	4579	* For compound VNOPs, the authorization callback may
	4580	* return ENOENT in case of racing hardlink lookups
	4581	* hitting the name cache, redrive the lookup.
	4582	*/
	4583	do_retry = 1;
	4584	retry_count += 1;
	4585	goto out;
	4586	}
	4587	}
	4588	}
	4589
	4590	/*
	4591	* Call out to allow 3rd party notification of delete.
	4592	* Ignore result of kauth_authorize_fileop call.
	4593	*/
	4594	if (!error) {
	4595	if (has_listeners) {
	4596	kauth_authorize_fileop(vfs_context_ucred(ctx),
	4597	KAUTH_FILEOP_DELETE,
	4598	(uintptr_t)vp,
	4599	(uintptr_t)path);
	4600	}
	4601
	4602	if (vp->v_flag & VISHARDLINK) {
	4603	//
	4604	// if a hardlink gets deleted we want to blow away the
	4605	// v_parent link because the path that got us to this
	4606	// instance of the link is no longer valid. this will
	4607	// force the next call to get the path to ask the file
	4608	// system instead of just following the v_parent link.
	4609	//
	4610	vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
	4611	}
	4612
	4613	#if CONFIG_FSE
	4614	if (need_event) {
	4615	if (vp->v_flag & VISHARDLINK) {
	4616	get_fse_info(vp, &finfo, ctx);
	4617	} else if (vap) {
	4618	vnode_get_fse_info_from_vap(vp, &finfo, vap);
	4619	}
	4620	if (truncated_path) {
	4621	finfo.mode \|= FSE_TRUNCATED_PATH;
	4622	}
	4623	add_fsevent(FSE_DELETE, ctx,
	4624	FSE_ARG_STRING, len, path,
	4625	FSE_ARG_FINFO, &finfo,
	4626	FSE_ARG_DONE);
	4627	}
	4628	#endif
	4629	}
	4630
	4631	out:
	4632	if (path != NULL)
	4633	RELEASE_PATH(path);
	4634
	4635	#if NAMEDRSRCFORK
	4636	/* recycle the deleted rsrc fork vnode to force a reclaim, which
	4637	* will cause its shadow file to go away if necessary.
	4638	*/
	4639	if (vp && (vnode_isnamedstream(vp)) &&
	4640	(vp->v_parent != NULLVP) &&
	4641	vnode_isshadow(vp)) {
	4642	vnode_recycle(vp);
	4643	}
	4644	#endif
	4645	/*
	4646	* nameidone has to happen before we vnode_put(dvp)
	4647	* since it may need to release the fs_nodelock on the dvp
	4648	*/
	4649	nameidone(&nd);
	4650	vnode_put(dvp);
	4651	if (vp) {
	4652	vnode_put(vp);
	4653	}
	4654
	4655	if (do_retry) {
	4656	goto retry;
	4657	}
	4658
	4659	return (error);
	4660	}
	4661
	4662	int
	4663	unlink1(vfs_context_t ctx, vnode_t start_dvp, user_addr_t path_arg,
	4664	enum uio_seg segflg, int unlink_flags)
	4665	{
	4666	return (unlinkat_internal(ctx, AT_FDCWD, start_dvp, path_arg, segflg,
	4667	unlink_flags));
	4668	}
	4669
	4670	/*
	4671	* Delete a name from the filesystem using Carbon semantics.
	4672	*/
	4673	int
	4674	delete(__unused proc_t p, struct delete_args uap, __unused int32_t retval)
	4675	{
	4676	return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
	4677	uap->path, UIO_USERSPACE, VNODE_REMOVE_NODELETEBUSY));
	4678	}
	4679
	4680	/*
	4681	* Delete a name from the filesystem using POSIX semantics.
	4682	*/
	4683	int
	4684	unlink(__unused proc_t p, struct unlink_args uap, __unused int32_t retval)
	4685	{
	4686	return (unlinkat_internal(vfs_context_current(), AT_FDCWD, NULLVP,
	4687	uap->path, UIO_USERSPACE, 0));
	4688	}
	4689
	4690	int
	4691	unlinkat(__unused proc_t p, struct unlinkat_args uap, __unused int32_t retval)
	4692	{
	4693	if (uap->flag & ~AT_REMOVEDIR)
	4694	return (EINVAL);
	4695
	4696	if (uap->flag & AT_REMOVEDIR)
	4697	return (rmdirat_internal(vfs_context_current(), uap->fd,
	4698	uap->path, UIO_USERSPACE));
	4699	else
	4700	return (unlinkat_internal(vfs_context_current(), uap->fd,
	4701	NULLVP, uap->path, UIO_USERSPACE, 0));
	4702	}
	4703
	4704	/*
	4705	* Reposition read/write file offset.
	4706	*/
	4707	int
	4708	lseek(proc_t p, struct lseek_args uap, off_t retval)
	4709	{
	4710	struct fileproc *fp;
	4711	vnode_t vp;
	4712	struct vfs_context *ctx;
	4713	off_t offset = uap->offset, file_size;
	4714	int error;
	4715
	4716	if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) {
	4717	if (error == ENOTSUP)
	4718	return (ESPIPE);
	4719	return (error);
	4720	}
	4721	if (vnode_isfifo(vp)) {
	4722	file_drop(uap->fd);
	4723	return(ESPIPE);
	4724	}
	4725
	4726
	4727	ctx = vfs_context_current();
	4728	#if CONFIG_MACF
	4729	if (uap->whence == L_INCR && uap->offset == 0)
	4730	error = mac_file_check_get_offset(vfs_context_ucred(ctx),
	4731	fp->f_fglob);
	4732	else
	4733	error = mac_file_check_change_offset(vfs_context_ucred(ctx),
	4734	fp->f_fglob);
	4735	if (error) {
	4736	file_drop(uap->fd);
	4737	return (error);
	4738	}
	4739	#endif
	4740	if ( (error = vnode_getwithref(vp)) ) {
	4741	file_drop(uap->fd);
	4742	return(error);
	4743	}
	4744
	4745	switch (uap->whence) {
	4746	case L_INCR:
	4747	offset += fp->f_fglob->fg_offset;
	4748	break;
	4749	case L_XTND:
	4750	if ((error = vnode_size(vp, &file_size, ctx)) != 0)
	4751	break;
	4752	offset += file_size;
	4753	break;
	4754	case L_SET:
	4755	break;
	4756	default:
	4757	error = EINVAL;
	4758	}
	4759	if (error == 0) {
	4760	if (uap->offset > 0 && offset < 0) {
	4761	/* Incremented/relative move past max size */
	4762	error = EOVERFLOW;
	4763	} else {
	4764	/*
	4765	* Allow negative offsets on character devices, per
	4766	* POSIX 1003.1-2001. Most likely for writing disk
	4767	* labels.
	4768	*/
	4769	if (offset < 0 && vp->v_type != VCHR) {
	4770	/* Decremented/relative move before start */
	4771	error = EINVAL;
	4772	} else {
	4773	/* Success */
	4774	fp->f_fglob->fg_offset = offset;
	4775	*retval = fp->f_fglob->fg_offset;
	4776	}
	4777	}
	4778	}
	4779
	4780	/*
	4781	* An lseek can affect whether data is "available to read." Use
	4782	* hint of NOTE_NONE so no EVFILT_VNODE events fire
	4783	*/
	4784	post_event_if_success(vp, error, NOTE_NONE);
	4785	(void)vnode_put(vp);
	4786	file_drop(uap->fd);
	4787	return (error);
	4788	}
	4789
	4790
	4791	/*
	4792	* Check access permissions.
	4793	*
	4794	* Returns: 0 Success
	4795	* vnode_authorize:???
	4796	*/
	4797	static int
	4798	access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx)
	4799	{
	4800	kauth_action_t action;
	4801	int error;
	4802
	4803	/*
	4804	* If just the regular access bits, convert them to something
	4805	* that vnode_authorize will understand.
	4806	*/
	4807	if (!(uflags & _ACCESS_EXTENDED_MASK)) {
	4808	action = 0;
	4809	if (uflags & R_OK)
	4810	action \|= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */
	4811	if (uflags & W_OK) {
	4812	if (vnode_isdir(vp)) {
	4813	action \|= KAUTH_VNODE_ADD_FILE \|
	4814	KAUTH_VNODE_ADD_SUBDIRECTORY;
	4815	/* might want delete rights here too */
	4816	} else {
	4817	action \|= KAUTH_VNODE_WRITE_DATA;
	4818	}
	4819	}
	4820	if (uflags & X_OK) {
	4821	if (vnode_isdir(vp)) {
	4822	action \|= KAUTH_VNODE_SEARCH;
	4823	} else {
	4824	action \|= KAUTH_VNODE_EXECUTE;
	4825	}
	4826	}
	4827	} else {
	4828	/* take advantage of definition of uflags */
	4829	action = uflags >> 8;
	4830	}
	4831
	4832	#if CONFIG_MACF
	4833	error = mac_vnode_check_access(ctx, vp, uflags);
	4834	if (error)
	4835	return (error);
	4836	#endif /* MAC */
	4837
	4838	/* action == 0 means only check for existence */
	4839	if (action != 0) {
	4840	error = vnode_authorize(vp, dvp, action \| KAUTH_VNODE_ACCESS, ctx);
	4841	} else {
	4842	error = 0;
	4843	}
	4844
	4845	return(error);
	4846	}
	4847
	4848
	4849
	4850	/*
	4851	* access_extended: Check access permissions in bulk.
	4852	*
	4853	* Description: uap->entries Pointer to an array of accessx
	4854	* descriptor structs, plus one or
	4855	* more NULL terminated strings (see
	4856	* "Notes" section below).
	4857	* uap->size Size of the area pointed to by
	4858	* uap->entries.
	4859	* uap->results Pointer to the results array.
	4860	*
	4861	* Returns: 0 Success
	4862	* ENOMEM Insufficient memory
	4863	* EINVAL Invalid arguments
	4864	* namei:EFAULT Bad address
	4865	* namei:ENAMETOOLONG Filename too long
	4866	* namei:ENOENT No such file or directory
	4867	* namei:ELOOP Too many levels of symbolic links
	4868	* namei:EBADF Bad file descriptor
	4869	* namei:ENOTDIR Not a directory
	4870	* namei:???
	4871	* access1:
	4872	*
	4873	* Implicit returns:
	4874	* uap->results Array contents modified
	4875	*
	4876	* Notes: The uap->entries are structured as an arbitrary length array
	4877	* of accessx descriptors, followed by one or more NULL terminated
	4878	* strings
	4879	*
	4880	* struct accessx_descriptor[0]
	4881	* ...
	4882	* struct accessx_descriptor[n]
	4883	* char name_data[0];
	4884	*
	4885	* We determine the entry count by walking the buffer containing
	4886	* the uap->entries argument descriptor. For each descriptor we
	4887	* see, the valid values for the offset ad_name_offset will be
	4888	* in the byte range:
	4889	*
	4890	* [ uap->entries + sizeof(struct accessx_descriptor) ]
	4891	* to
	4892	* [ uap->entries + uap->size - 2 ]
	4893	*
	4894	* since we must have at least one string, and the string must
	4895	* be at least one character plus the NULL terminator in length.
	4896	*
	4897	* XXX: Need to support the check-as uid argument
	4898	*/
	4899	int
	4900	access_extended(__unused proc_t p, struct access_extended_args uap, __unused int32_t retval)
	4901	{
	4902	struct accessx_descriptor *input = NULL;
	4903	errno_t *result = NULL;
	4904	errno_t error = 0;
	4905	int wantdelete = 0;
	4906	unsigned int desc_max, desc_actual, i, j;
	4907	struct vfs_context context;
	4908	struct nameidata nd;
	4909	int niopts;
	4910	vnode_t vp = NULL;
	4911	vnode_t dvp = NULL;
	4912	#define ACCESSX_MAX_DESCR_ON_STACK 10
	4913	struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK];
	4914
	4915	context.vc_ucred = NULL;
	4916
	4917	/*
	4918	* Validate parameters; if valid, copy the descriptor array and string
	4919	* arguments into local memory. Before proceeding, the following
	4920	* conditions must have been met:
	4921	*
	4922	* o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE
	4923	* o There must be sufficient room in the request for at least one
	4924	* descriptor and a one yte NUL terminated string.
	4925	* o The allocation of local storage must not fail.
	4926	*/
	4927	if (uap->size > ACCESSX_MAX_TABLESIZE)
	4928	return(ENOMEM);
	4929	if (uap->size < (sizeof(struct accessx_descriptor) + 2))
	4930	return(EINVAL);
	4931	if (uap->size <= sizeof (stack_input)) {
	4932	input = stack_input;
	4933	} else {
	4934	MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK);
	4935	if (input == NULL) {
	4936	error = ENOMEM;
	4937	goto out;
	4938	}
	4939	}
	4940	error = copyin(uap->entries, input, uap->size);
	4941	if (error)
	4942	goto out;
	4943
	4944	AUDIT_ARG(opaque, input, uap->size);
	4945
	4946	/*
	4947	* Force NUL termination of the copyin buffer to avoid nami() running
	4948	* off the end. If the caller passes us bogus data, they may get a
	4949	* bogus result.
	4950	*/
	4951	((char *)input)[uap->size - 1] = 0;
	4952
	4953	/*
	4954	* Access is defined as checking against the process' real identity,
	4955	* even if operations are checking the effective identity. This
	4956	* requires that we use a local vfs context.
	4957	*/
	4958	context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
	4959	context.vc_thread = current_thread();
	4960
	4961	/*
	4962	* Find out how many entries we have, so we can allocate the result
	4963	* array by walking the list and adjusting the count downward by the
	4964	* earliest string offset we see.
	4965	*/
	4966	desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor);
	4967	desc_actual = desc_max;
	4968	for (i = 0; i < desc_actual; i++) {
	4969	/*
	4970	* Take the offset to the name string for this entry and
	4971	* convert to an input array index, which would be one off
	4972	* the end of the array if this entry was the lowest-addressed
	4973	* name string.
	4974	*/
	4975	j = input[i].ad_name_offset / sizeof(struct accessx_descriptor);
	4976
	4977	/*
	4978	* An offset greater than the max allowable offset is an error.
	4979	* It is also an error for any valid entry to point
	4980	* to a location prior to the end of the current entry, if
	4981	* it's not a reference to the string of the previous entry.
	4982	*/
	4983	if (j > desc_max \|\| (j != 0 && j <= i)) {
	4984	error = EINVAL;
	4985	goto out;
	4986	}
	4987
	4988	/*
	4989	* An offset of 0 means use the previous descriptor's offset;
	4990	* this is used to chain multiple requests for the same file
	4991	* to avoid multiple lookups.
	4992	*/
	4993	if (j == 0) {
	4994	/* This is not valid for the first entry */
	4995	if (i == 0) {
	4996	error = EINVAL;
	4997	goto out;
	4998	}
	4999	continue;
	5000	}
	5001
	5002	/*
	5003	* If the offset of the string for this descriptor is before
	5004	* what we believe is the current actual last descriptor,
	5005	* then we need to adjust our estimate downward; this permits
	5006	* the string table following the last descriptor to be out
	5007	* of order relative to the descriptor list.
	5008	*/
	5009	if (j < desc_actual)
	5010	desc_actual = j;
	5011	}
	5012
	5013	/*
	5014	* We limit the actual number of descriptors we are willing to process
	5015	* to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being
	5016	* requested does not exceed this limit,
	5017	*/
	5018	if (desc_actual > ACCESSX_MAX_DESCRIPTORS) {
	5019	error = ENOMEM;
	5020	goto out;
	5021	}
	5022	MALLOC(result, errno_t , desc_actual sizeof(errno_t), M_TEMP, M_WAITOK);
	5023	if (result == NULL) {
	5024	error = ENOMEM;
	5025	goto out;
	5026	}
	5027
	5028	/*
	5029	* Do the work by iterating over the descriptor entries we know to
	5030	* at least appear to contain valid data.
	5031	*/
	5032	error = 0;
	5033	for (i = 0; i < desc_actual; i++) {
	5034	/*
	5035	* If the ad_name_offset is 0, then we use the previous
	5036	* results to make the check; otherwise, we are looking up
	5037	* a new file name.
	5038	*/
	5039	if (input[i].ad_name_offset != 0) {
	5040	/* discard old vnodes */
	5041	if (vp) {
	5042	vnode_put(vp);
	5043	vp = NULL;
	5044	}
	5045	if (dvp) {
	5046	vnode_put(dvp);
	5047	dvp = NULL;
	5048	}
	5049
	5050	/*
	5051	* Scan forward in the descriptor list to see if we
	5052	* need the parent vnode. We will need it if we are
	5053	* deleting, since we must have rights to remove
	5054	* entries in the parent directory, as well as the
	5055	* rights to delete the object itself.
	5056	*/
	5057	wantdelete = input[i].ad_flags & _DELETE_OK;
	5058	for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++)
	5059	if (input[j].ad_flags & _DELETE_OK)
	5060	wantdelete = 1;
	5061
	5062	niopts = FOLLOW \| AUDITVNPATH1;
	5063
	5064	/* need parent for vnode_authorize for deletion test */
	5065	if (wantdelete)
	5066	niopts \|= WANTPARENT;
	5067
	5068	/* do the lookup */
	5069	NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE,
	5070	CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset),
	5071	&context);
	5072	error = namei(&nd);
	5073	if (!error) {
	5074	vp = nd.ni_vp;
	5075	if (wantdelete)
	5076	dvp = nd.ni_dvp;
	5077	}
	5078	nameidone(&nd);
	5079	}
	5080
	5081	/*
	5082	* Handle lookup errors.
	5083	*/
	5084	switch(error) {
	5085	case ENOENT:
	5086	case EACCES:
	5087	case EPERM:
	5088	case ENOTDIR:
	5089	result[i] = error;
	5090	break;
	5091	case 0:
	5092	/* run this access check */
	5093	result[i] = access1(vp, dvp, input[i].ad_flags, &context);
	5094	break;
	5095	default:
	5096	/* fatal lookup error */
	5097
	5098	goto out;
	5099	}
	5100	}
	5101
	5102	AUDIT_ARG(data, result, sizeof(errno_t), desc_actual);
	5103
	5104	/* copy out results */
	5105	error = copyout(result, uap->results, desc_actual * sizeof(errno_t));
	5106
	5107	out:
	5108	if (input && input != stack_input)
	5109	FREE(input, M_TEMP);
	5110	if (result)
	5111	FREE(result, M_TEMP);
	5112	if (vp)
	5113	vnode_put(vp);
	5114	if (dvp)
	5115	vnode_put(dvp);
	5116	if (IS_VALID_CRED(context.vc_ucred))
	5117	kauth_cred_unref(&context.vc_ucred);
	5118	return(error);
	5119	}
	5120
	5121
	5122	/*
	5123	* Returns: 0 Success
	5124	* namei:EFAULT Bad address
	5125	* namei:ENAMETOOLONG Filename too long
	5126	* namei:ENOENT No such file or directory
	5127	* namei:ELOOP Too many levels of symbolic links
	5128	* namei:EBADF Bad file descriptor
	5129	* namei:ENOTDIR Not a directory
	5130	* namei:???
	5131	* access1:
	5132	*/
	5133	static int
	5134	faccessat_internal(vfs_context_t ctx, int fd, user_addr_t path, int amode,
	5135	int flag, enum uio_seg segflg)
	5136	{
	5137	int error;
	5138	struct nameidata nd;
	5139	int niopts;
	5140	struct vfs_context context;
	5141	#if NAMEDRSRCFORK
	5142	int is_namedstream = 0;
	5143	#endif
	5144
	5145	/*
	5146	* Unless the AT_EACCESS option is used, Access is defined as checking
	5147	* against the process' real identity, even if operations are checking
	5148	* the effective identity. So we need to tweak the credential
	5149	* in the context for that case.
	5150	*/
	5151	if (!(flag & AT_EACCESS))
	5152	context.vc_ucred = kauth_cred_copy_real(kauth_cred_get());
	5153	else
	5154	context.vc_ucred = ctx->vc_ucred;
	5155	context.vc_thread = ctx->vc_thread;
	5156
	5157
	5158	niopts = FOLLOW \| AUDITVNPATH1;
	5159	/* need parent for vnode_authorize for deletion test */
	5160	if (amode & _DELETE_OK)
	5161	niopts \|= WANTPARENT;
	5162	NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, segflg,
	5163	path, &context);
	5164
	5165	#if NAMEDRSRCFORK
	5166	/* access(F_OK) calls are allowed for resource forks. */
	5167	if (amode == F_OK)
	5168	nd.ni_cnd.cn_flags \|= CN_ALLOWRSRCFORK;
	5169	#endif
	5170	error = nameiat(&nd, fd);
	5171	if (error)
	5172	goto out;
	5173
	5174	#if NAMEDRSRCFORK
	5175	/* Grab reference on the shadow stream file vnode to
	5176	* force an inactive on release which will mark it
	5177	* for recycle.
	5178	*/
	5179	if (vnode_isnamedstream(nd.ni_vp) &&
	5180	(nd.ni_vp->v_parent != NULLVP) &&
	5181	vnode_isshadow(nd.ni_vp)) {
	5182	is_namedstream = 1;
	5183	vnode_ref(nd.ni_vp);
	5184	}
	5185	#endif
	5186
	5187	error = access1(nd.ni_vp, nd.ni_dvp, amode, &context);
	5188
	5189	#if NAMEDRSRCFORK
	5190	if (is_namedstream) {
	5191	vnode_rele(nd.ni_vp);
	5192	}
	5193	#endif
	5194
	5195	vnode_put(nd.ni_vp);
	5196	if (amode & _DELETE_OK)
	5197	vnode_put(nd.ni_dvp);
	5198	nameidone(&nd);
	5199
	5200	out:
	5201	if (!(flag & AT_EACCESS))
	5202	kauth_cred_unref(&context.vc_ucred);
	5203	return (error);
	5204	}
	5205
	5206	int
	5207	access(__unused proc_t p, struct access_args uap, __unused int32_t retval)
	5208	{
	5209	return (faccessat_internal(vfs_context_current(), AT_FDCWD,
	5210	uap->path, uap->flags, 0, UIO_USERSPACE));
	5211	}
	5212
	5213	int
	5214	faccessat(__unused proc_t p, struct faccessat_args *uap,
	5215	__unused int32_t *retval)
	5216	{
	5217	if (uap->flag & ~AT_EACCESS)
	5218	return (EINVAL);
	5219
	5220	return (faccessat_internal(vfs_context_current(), uap->fd,
	5221	uap->path, uap->amode, uap->flag, UIO_USERSPACE));
	5222	}
	5223
	5224	/*
	5225	* Returns: 0 Success
	5226	* EFAULT
	5227	* copyout:EFAULT
	5228	* namei:???
	5229	* vn_stat:???
	5230	*/
	5231	static int
	5232	fstatat_internal(vfs_context_t ctx, user_addr_t path, user_addr_t ub,
	5233	user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64,
	5234	enum uio_seg segflg, int fd, int flag)
	5235	{
	5236	struct nameidata nd;
	5237	int follow;
	5238	union {
	5239	struct stat sb;
	5240	struct stat64 sb64;
	5241	} source;
	5242	union {
	5243	struct user64_stat user64_sb;
	5244	struct user32_stat user32_sb;
	5245	struct user64_stat64 user64_sb64;
	5246	struct user32_stat64 user32_sb64;
	5247	} dest;
	5248	caddr_t sbp;
	5249	int error, my_size;
	5250	kauth_filesec_t fsec;
	5251	size_t xsecurity_bufsize;
	5252	void * statptr;
	5253
	5254	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
	5255	NDINIT(&nd, LOOKUP, OP_GETATTR, follow \| AUDITVNPATH1,
	5256	segflg, path, ctx);
	5257
	5258	#if NAMEDRSRCFORK
	5259	int is_namedstream = 0;
	5260	/* stat calls are allowed for resource forks. */
	5261	nd.ni_cnd.cn_flags \|= CN_ALLOWRSRCFORK;
	5262	#endif
	5263	error = nameiat(&nd, fd);
	5264	if (error)
	5265	return (error);
	5266	fsec = KAUTH_FILESEC_NONE;
	5267
	5268	statptr = (void *)&source;
	5269
	5270	#if NAMEDRSRCFORK
	5271	/* Grab reference on the shadow stream file vnode to
	5272	* force an inactive on release which will mark it
	5273	* for recycle.
	5274	*/
	5275	if (vnode_isnamedstream(nd.ni_vp) &&
	5276	(nd.ni_vp->v_parent != NULLVP) &&
	5277	vnode_isshadow(nd.ni_vp)) {
	5278	is_namedstream = 1;
	5279	vnode_ref(nd.ni_vp);
	5280	}
	5281	#endif
	5282
	5283	error = vn_stat(nd.ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx);
	5284
	5285	#if NAMEDRSRCFORK
	5286	if (is_namedstream) {
	5287	vnode_rele(nd.ni_vp);
	5288	}
	5289	#endif
	5290	vnode_put(nd.ni_vp);
	5291	nameidone(&nd);
	5292
	5293	if (error)
	5294	return (error);
	5295	/* Zap spare fields */
	5296	if (isstat64 != 0) {
	5297	source.sb64.st_lspare = 0;
	5298	source.sb64.st_qspare[0] = 0LL;
	5299	source.sb64.st_qspare[1] = 0LL;
	5300	if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
	5301	munge_user64_stat64(&source.sb64, &dest.user64_sb64);
	5302	my_size = sizeof(dest.user64_sb64);
	5303	sbp = (caddr_t)&dest.user64_sb64;
	5304	} else {
	5305	munge_user32_stat64(&source.sb64, &dest.user32_sb64);
	5306	my_size = sizeof(dest.user32_sb64);
	5307	sbp = (caddr_t)&dest.user32_sb64;
	5308	}
	5309	/*
	5310	* Check if we raced (post lookup) against the last unlink of a file.
	5311	*/
	5312	if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) {
	5313	source.sb64.st_nlink = 1;
	5314	}
	5315	} else {
	5316	source.sb.st_lspare = 0;
	5317	source.sb.st_qspare[0] = 0LL;
	5318	source.sb.st_qspare[1] = 0LL;
	5319	if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) {
	5320	munge_user64_stat(&source.sb, &dest.user64_sb);
	5321	my_size = sizeof(dest.user64_sb);
	5322	sbp = (caddr_t)&dest.user64_sb;
	5323	} else {
	5324	munge_user32_stat(&source.sb, &dest.user32_sb);
	5325	my_size = sizeof(dest.user32_sb);
	5326	sbp = (caddr_t)&dest.user32_sb;
	5327	}
	5328
	5329	/*
	5330	* Check if we raced (post lookup) against the last unlink of a file.
	5331	*/
	5332	if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) {
	5333	source.sb.st_nlink = 1;
	5334	}
	5335	}
	5336	if ((error = copyout(sbp, ub, my_size)) != 0)
	5337	goto out;
	5338
	5339	/* caller wants extended security information? */
	5340	if (xsecurity != USER_ADDR_NULL) {
	5341
	5342	/* did we get any? */
	5343	if (fsec == KAUTH_FILESEC_NONE) {
	5344	if (susize(xsecurity_size, 0) != 0) {
	5345	error = EFAULT;
	5346	goto out;
	5347	}
	5348	} else {
	5349	/* find the user buffer size */
	5350	xsecurity_bufsize = fusize(xsecurity_size);
	5351
	5352	/* copy out the actual data size */
	5353	if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) {
	5354	error = EFAULT;
	5355	goto out;
	5356	}
	5357
	5358	/* if the caller supplied enough room, copy out to it */
	5359	if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec))
	5360	error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec));
	5361	}
	5362	}
	5363	out:
	5364	if (fsec != KAUTH_FILESEC_NONE)
	5365	kauth_filesec_free(fsec);
	5366	return (error);
	5367	}
	5368
	5369	/*
	5370	* stat_extended: Get file status; with extended security (ACL).
	5371	*
	5372	* Parameters: p (ignored)
	5373	* uap User argument descriptor (see below)
	5374	* retval (ignored)
	5375	*
	5376	* Indirect: uap->path Path of file to get status from
	5377	* uap->ub User buffer (holds file status info)
	5378	* uap->xsecurity ACL to get (extended security)
	5379	* uap->xsecurity_size Size of ACL
	5380	*
	5381	* Returns: 0 Success
	5382	* !0 errno value
	5383	*
	5384	*/
	5385	int
	5386	stat_extended(__unused proc_t p, struct stat_extended_args *uap,
	5387	__unused int32_t *retval)
	5388	{
	5389	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5390	uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
	5391	0));
	5392	}
	5393
	5394	/*
	5395	* Returns: 0 Success
	5396	* fstatat_internal:??? [see fstatat_internal() in this file]
	5397	*/
	5398	int
	5399	stat(__unused proc_t p, struct stat_args uap, __unused int32_t retval)
	5400	{
	5401	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5402	0, 0, 0, UIO_USERSPACE, AT_FDCWD, 0));
	5403	}
	5404
	5405	int
	5406	stat64(__unused proc_t p, struct stat64_args uap, __unused int32_t retval)
	5407	{
	5408	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5409	0, 0, 1, UIO_USERSPACE, AT_FDCWD, 0));
	5410	}
	5411
	5412	/*
	5413	* stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL).
	5414	*
	5415	* Parameters: p (ignored)
	5416	* uap User argument descriptor (see below)
	5417	* retval (ignored)
	5418	*
	5419	* Indirect: uap->path Path of file to get status from
	5420	* uap->ub User buffer (holds file status info)
	5421	* uap->xsecurity ACL to get (extended security)
	5422	* uap->xsecurity_size Size of ACL
	5423	*
	5424	* Returns: 0 Success
	5425	* !0 errno value
	5426	*
	5427	*/
	5428	int
	5429	stat64_extended(__unused proc_t p, struct stat64_extended_args uap, __unused int32_t retval)
	5430	{
	5431	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5432	uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
	5433	0));
	5434	}
	5435
	5436	/*
	5437	* lstat_extended: Get file status; does not follow links; with extended security (ACL).
	5438	*
	5439	* Parameters: p (ignored)
	5440	* uap User argument descriptor (see below)
	5441	* retval (ignored)
	5442	*
	5443	* Indirect: uap->path Path of file to get status from
	5444	* uap->ub User buffer (holds file status info)
	5445	* uap->xsecurity ACL to get (extended security)
	5446	* uap->xsecurity_size Size of ACL
	5447	*
	5448	* Returns: 0 Success
	5449	* !0 errno value
	5450	*
	5451	*/
	5452	int
	5453	lstat_extended(__unused proc_t p, struct lstat_extended_args uap, __unused int32_t retval)
	5454	{
	5455	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5456	uap->xsecurity, uap->xsecurity_size, 0, UIO_USERSPACE, AT_FDCWD,
	5457	AT_SYMLINK_NOFOLLOW));
	5458	}
	5459
	5460	/*
	5461	* Get file status; this version does not follow links.
	5462	*/
	5463	int
	5464	lstat(__unused proc_t p, struct lstat_args uap, __unused int32_t retval)
	5465	{
	5466	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5467	0, 0, 0, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
	5468	}
	5469
	5470	int
	5471	lstat64(__unused proc_t p, struct lstat64_args uap, __unused int32_t retval)
	5472	{
	5473	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5474	0, 0, 1, UIO_USERSPACE, AT_FDCWD, AT_SYMLINK_NOFOLLOW));
	5475	}
	5476
	5477	/*
	5478	* lstat64_extended: Get file status; can handle large inode numbers; does not
	5479	* follow links; with extended security (ACL).
	5480	*
	5481	* Parameters: p (ignored)
	5482	* uap User argument descriptor (see below)
	5483	* retval (ignored)
	5484	*
	5485	* Indirect: uap->path Path of file to get status from
	5486	* uap->ub User buffer (holds file status info)
	5487	* uap->xsecurity ACL to get (extended security)
	5488	* uap->xsecurity_size Size of ACL
	5489	*
	5490	* Returns: 0 Success
	5491	* !0 errno value
	5492	*
	5493	*/
	5494	int
	5495	lstat64_extended(__unused proc_t p, struct lstat64_extended_args uap, __unused int32_t retval)
	5496	{
	5497	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5498	uap->xsecurity, uap->xsecurity_size, 1, UIO_USERSPACE, AT_FDCWD,
	5499	AT_SYMLINK_NOFOLLOW));
	5500	}
	5501
	5502	int
	5503	fstatat(__unused proc_t p, struct fstatat_args uap, __unused int32_t retval)
	5504	{
	5505	if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
	5506	return (EINVAL);
	5507
	5508	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5509	0, 0, 0, UIO_USERSPACE, uap->fd, uap->flag));
	5510	}
	5511
	5512	int
	5513	fstatat64(__unused proc_t p, struct fstatat64_args *uap,
	5514	__unused int32_t *retval)
	5515	{
	5516	if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
	5517	return (EINVAL);
	5518
	5519	return (fstatat_internal(vfs_context_current(), uap->path, uap->ub,
	5520	0, 0, 1, UIO_USERSPACE, uap->fd, uap->flag));
	5521	}
	5522
	5523	/*
	5524	* Get configurable pathname variables.
	5525	*
	5526	* Returns: 0 Success
	5527	* namei:???
	5528	* vn_pathconf:???
	5529	*
	5530	* Notes: Global implementation constants are intended to be
	5531	* implemented in this function directly; all other constants
	5532	* are per-FS implementation, and therefore must be handled in
	5533	* each respective FS, instead.
	5534	*
	5535	* XXX We implement some things globally right now that should actually be
	5536	* XXX per-FS; we will need to deal with this at some point.
	5537	*/
	5538	/* ARGSUSED */
	5539	int
	5540	pathconf(__unused proc_t p, struct pathconf_args uap, int32_t retval)
	5541	{
	5542	int error;
	5543	struct nameidata nd;
	5544	vfs_context_t ctx = vfs_context_current();
	5545
	5546	NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW \| AUDITVNPATH1,
	5547	UIO_USERSPACE, uap->path, ctx);
	5548	error = namei(&nd);
	5549	if (error)
	5550	return (error);
	5551
	5552	error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx);
	5553
	5554	vnode_put(nd.ni_vp);
	5555	nameidone(&nd);
	5556	return (error);
	5557	}
	5558
	5559	/*
	5560	* Return target name of a symbolic link.
	5561	*/
	5562	/* ARGSUSED */
	5563	static int
	5564	readlinkat_internal(vfs_context_t ctx, int fd, user_addr_t path,
	5565	enum uio_seg seg, user_addr_t buf, size_t bufsize, enum uio_seg bufseg,
	5566	int *retval)
	5567	{
	5568	vnode_t vp;
	5569	uio_t auio;
	5570	int error;
	5571	struct nameidata nd;
	5572	char uio_buf[ UIO_SIZEOF(1) ];
	5573
	5574	NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW \| AUDITVNPATH1,
	5575	seg, path, ctx);
	5576
	5577	error = nameiat(&nd, fd);
	5578	if (error)
	5579	return (error);
	5580	vp = nd.ni_vp;
	5581
	5582	nameidone(&nd);
	5583
	5584	auio = uio_createwithbuffer(1, 0, bufseg, UIO_READ,
	5585	&uio_buf[0], sizeof(uio_buf));
	5586	uio_addiov(auio, buf, bufsize);
	5587	if (vp->v_type != VLNK) {
	5588	error = EINVAL;
	5589	} else {
	5590	#if CONFIG_MACF
	5591	error = mac_vnode_check_readlink(ctx, vp);
	5592	#endif
	5593	if (error == 0)
	5594	error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA,
	5595	ctx);
	5596	if (error == 0)
	5597	error = VNOP_READLINK(vp, auio, ctx);
	5598	}
	5599	vnode_put(vp);
	5600
	5601	*retval = bufsize - (int)uio_resid(auio);
	5602	return (error);
	5603	}
	5604
	5605	int
	5606	readlink(proc_t p, struct readlink_args uap, int32_t retval)
	5607	{
	5608	enum uio_seg procseg;
	5609
	5610	procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	5611	return (readlinkat_internal(vfs_context_current(), AT_FDCWD,
	5612	CAST_USER_ADDR_T(uap->path), procseg, CAST_USER_ADDR_T(uap->buf),
	5613	uap->count, procseg, retval));
	5614	}
	5615
	5616	int
	5617	readlinkat(proc_t p, struct readlinkat_args uap, int32_t retval)
	5618	{
	5619	enum uio_seg procseg;
	5620
	5621	procseg = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	5622	return (readlinkat_internal(vfs_context_current(), uap->fd, uap->path,
	5623	procseg, uap->buf, uap->bufsize, procseg, retval));
	5624	}
	5625
	5626	/*
	5627	* Change file flags.
	5628	*/
	5629	static int
	5630	chflags1(vnode_t vp, int flags, vfs_context_t ctx)
	5631	{
	5632	struct vnode_attr va;
	5633	kauth_action_t action;
	5634	int error;
	5635
	5636	VATTR_INIT(&va);
	5637	VATTR_SET(&va, va_flags, flags);
	5638
	5639	#if CONFIG_MACF
	5640	error = mac_vnode_check_setflags(ctx, vp, flags);
	5641	if (error)
	5642	goto out;
	5643	#endif
	5644
	5645	/* request authorisation, disregard immutability */
	5646	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
	5647	goto out;
	5648	/*
	5649	* Request that the auth layer disregard those file flags it's allowed to when
	5650	* authorizing this operation; we need to do this in order to be able to
	5651	* clear immutable flags.
	5652	*/
	5653	if (action && ((error = vnode_authorize(vp, NULL, action \| KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0))
	5654	goto out;
	5655	error = vnode_setattr(vp, &va, ctx);
	5656
	5657	if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) {
	5658	error = ENOTSUP;
	5659	}
	5660	out:
	5661	vnode_put(vp);
	5662	return(error);
	5663	}
	5664
	5665	/*
	5666	* Change flags of a file given a path name.
	5667	*/
	5668	/* ARGSUSED */
	5669	int
	5670	chflags(__unused proc_t p, struct chflags_args uap, __unused int32_t retval)
	5671	{
	5672	vnode_t vp;
	5673	vfs_context_t ctx = vfs_context_current();
	5674	int error;
	5675	struct nameidata nd;
	5676
	5677	AUDIT_ARG(fflags, uap->flags);
	5678	NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW \| AUDITVNPATH1,
	5679	UIO_USERSPACE, uap->path, ctx);
	5680	error = namei(&nd);
	5681	if (error)
	5682	return (error);
	5683	vp = nd.ni_vp;
	5684	nameidone(&nd);
	5685
	5686	error = chflags1(vp, uap->flags, ctx);
	5687
	5688	return(error);
	5689	}
	5690
	5691	/*
	5692	* Change flags of a file given a file descriptor.
	5693	*/
	5694	/* ARGSUSED */
	5695	int
	5696	fchflags(__unused proc_t p, struct fchflags_args uap, __unused int32_t retval)
	5697	{
	5698	vnode_t vp;
	5699	int error;
	5700
	5701	AUDIT_ARG(fd, uap->fd);
	5702	AUDIT_ARG(fflags, uap->flags);
	5703	if ( (error = file_vnode(uap->fd, &vp)) )
	5704	return (error);
	5705
	5706	if ((error = vnode_getwithref(vp))) {
	5707	file_drop(uap->fd);
	5708	return(error);
	5709	}
	5710
	5711	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	5712
	5713	error = chflags1(vp, uap->flags, vfs_context_current());
	5714
	5715	file_drop(uap->fd);
	5716	return (error);
	5717	}
	5718
	5719	/*
	5720	* Change security information on a filesystem object.
	5721	*
	5722	* Returns: 0 Success
	5723	* EPERM Operation not permitted
	5724	* vnode_authattr:??? [anything vnode_authattr can return]
	5725	* vnode_authorize:??? [anything vnode_authorize can return]
	5726	* vnode_setattr:??? [anything vnode_setattr can return]
	5727	*
	5728	* Notes: If vnode_authattr or vnode_authorize return EACCES, it will be
	5729	* translated to EPERM before being returned.
	5730	*/
	5731	static int
	5732	chmod_vnode(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap)
	5733	{
	5734	kauth_action_t action;
	5735	int error;
	5736
	5737	AUDIT_ARG(mode, vap->va_mode);
	5738	/* XXX audit new args */
	5739
	5740	#if NAMEDSTREAMS
	5741	/* chmod calls are not allowed for resource forks. */
	5742	if (vp->v_flag & VISNAMEDSTREAM) {
	5743	return (EPERM);
	5744	}
	5745	#endif
	5746
	5747	#if CONFIG_MACF
	5748	if (VATTR_IS_ACTIVE(vap, va_mode) &&
	5749	(error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0)
	5750	return (error);
	5751	#endif
	5752
	5753	/* make sure that the caller is allowed to set this security information */
	5754	if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) \|\|
	5755	((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
	5756	if (error == EACCES)
	5757	error = EPERM;
	5758	return(error);
	5759	}
	5760
	5761	error = vnode_setattr(vp, vap, ctx);
	5762
	5763	return (error);
	5764	}
	5765
	5766
	5767	/*
	5768	* Change mode of a file given a path name.
	5769	*
	5770	* Returns: 0 Success
	5771	* namei:??? [anything namei can return]
	5772	* chmod_vnode:??? [anything chmod_vnode can return]
	5773	*/
	5774	static int
	5775	chmodat(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap,
	5776	int fd, int flag, enum uio_seg segflg)
	5777	{
	5778	struct nameidata nd;
	5779	int follow, error;
	5780
	5781	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
	5782	NDINIT(&nd, LOOKUP, OP_SETATTR, follow \| AUDITVNPATH1,
	5783	segflg, path, ctx);
	5784	if ((error = nameiat(&nd, fd)))
	5785	return (error);
	5786	error = chmod_vnode(ctx, nd.ni_vp, vap);
	5787	vnode_put(nd.ni_vp);
	5788	nameidone(&nd);
	5789	return(error);
	5790	}
	5791
	5792	/*
	5793	* chmod_extended: Change the mode of a file given a path name; with extended
	5794	* argument list (including extended security (ACL)).
	5795	*
	5796	* Parameters: p Process requesting the open
	5797	* uap User argument descriptor (see below)
	5798	* retval (ignored)
	5799	*
	5800	* Indirect: uap->path Path to object (same as 'chmod')
	5801	* uap->uid UID to set
	5802	* uap->gid GID to set
	5803	* uap->mode File mode to set (same as 'chmod')
	5804	* uap->xsecurity ACL to set (or delete)
	5805	*
	5806	* Returns: 0 Success
	5807	* !0 errno value
	5808	*
	5809	* Notes: The kauth_filesec_t in 'va', if any, is in host byte order.
	5810	*
	5811	* XXX: We should enummerate the possible errno values here, and where
	5812	* in the code they originated.
	5813	*/
	5814	int
	5815	chmod_extended(__unused proc_t p, struct chmod_extended_args uap, __unused int32_t retval)
	5816	{
	5817	int error;
	5818	struct vnode_attr va;
	5819	kauth_filesec_t xsecdst;
	5820
	5821	AUDIT_ARG(owner, uap->uid, uap->gid);
	5822
	5823	VATTR_INIT(&va);
	5824	if (uap->mode != -1)
	5825	VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
	5826	if (uap->uid != KAUTH_UID_NONE)
	5827	VATTR_SET(&va, va_uid, uap->uid);
	5828	if (uap->gid != KAUTH_GID_NONE)
	5829	VATTR_SET(&va, va_gid, uap->gid);
	5830
	5831	xsecdst = NULL;
	5832	switch(uap->xsecurity) {
	5833	/* explicit remove request */
	5834	case CAST_USER_ADDR_T((void )1): / _FILESEC_REMOVE_ACL */
	5835	VATTR_SET(&va, va_acl, NULL);
	5836	break;
	5837	/* not being set */
	5838	case USER_ADDR_NULL:
	5839	break;
	5840	default:
	5841	if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
	5842	return(error);
	5843	VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
	5844	KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount);
	5845	}
	5846
	5847	error = chmodat(vfs_context_current(), uap->path, &va, AT_FDCWD, 0,
	5848	UIO_USERSPACE);
	5849
	5850	if (xsecdst != NULL)
	5851	kauth_filesec_free(xsecdst);
	5852	return(error);
	5853	}
	5854
	5855	/*
	5856	* Returns: 0 Success
	5857	* chmodat:??? [anything chmodat can return]
	5858	*/
	5859	static int
	5860	fchmodat_internal(vfs_context_t ctx, user_addr_t path, int mode, int fd,
	5861	int flag, enum uio_seg segflg)
	5862	{
	5863	struct vnode_attr va;
	5864
	5865	VATTR_INIT(&va);
	5866	VATTR_SET(&va, va_mode, mode & ALLPERMS);
	5867
	5868	return (chmodat(ctx, path, &va, fd, flag, segflg));
	5869	}
	5870
	5871	int
	5872	chmod(__unused proc_t p, struct chmod_args uap, __unused int32_t retval)
	5873	{
	5874	return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
	5875	AT_FDCWD, 0, UIO_USERSPACE));
	5876	}
	5877
	5878	int
	5879	fchmodat(__unused proc_t p, struct fchmodat_args uap, __unused int32_t retval)
	5880	{
	5881	if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
	5882	return (EINVAL);
	5883
	5884	return (fchmodat_internal(vfs_context_current(), uap->path, uap->mode,
	5885	uap->fd, uap->flag, UIO_USERSPACE));
	5886	}
	5887
	5888	/*
	5889	* Change mode of a file given a file descriptor.
	5890	*/
	5891	static int
	5892	fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap)
	5893	{
	5894	vnode_t vp;
	5895	int error;
	5896
	5897	AUDIT_ARG(fd, fd);
	5898
	5899	if ((error = file_vnode(fd, &vp)) != 0)
	5900	return (error);
	5901	if ((error = vnode_getwithref(vp)) != 0) {
	5902	file_drop(fd);
	5903	return(error);
	5904	}
	5905	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	5906
	5907	error = chmod_vnode(vfs_context_current(), vp, vap);
	5908	(void)vnode_put(vp);
	5909	file_drop(fd);
	5910
	5911	return (error);
	5912	}
	5913
	5914	/*
	5915	* fchmod_extended: Change mode of a file given a file descriptor; with
	5916	* extended argument list (including extended security (ACL)).
	5917	*
	5918	* Parameters: p Process requesting to change file mode
	5919	* uap User argument descriptor (see below)
	5920	* retval (ignored)
	5921	*
	5922	* Indirect: uap->mode File mode to set (same as 'chmod')
	5923	* uap->uid UID to set
	5924	* uap->gid GID to set
	5925	* uap->xsecurity ACL to set (or delete)
	5926	* uap->fd File descriptor of file to change mode
	5927	*
	5928	* Returns: 0 Success
	5929	* !0 errno value
	5930	*
	5931	*/
	5932	int
	5933	fchmod_extended(proc_t p, struct fchmod_extended_args uap, __unused int32_t retval)
	5934	{
	5935	int error;
	5936	struct vnode_attr va;
	5937	kauth_filesec_t xsecdst;
	5938
	5939	AUDIT_ARG(owner, uap->uid, uap->gid);
	5940
	5941	VATTR_INIT(&va);
	5942	if (uap->mode != -1)
	5943	VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
	5944	if (uap->uid != KAUTH_UID_NONE)
	5945	VATTR_SET(&va, va_uid, uap->uid);
	5946	if (uap->gid != KAUTH_GID_NONE)
	5947	VATTR_SET(&va, va_gid, uap->gid);
	5948
	5949	xsecdst = NULL;
	5950	switch(uap->xsecurity) {
	5951	case USER_ADDR_NULL:
	5952	VATTR_SET(&va, va_acl, NULL);
	5953	break;
	5954	case CAST_USER_ADDR_T((void )1): / _FILESEC_REMOVE_ACL */
	5955	VATTR_SET(&va, va_acl, NULL);
	5956	break;
	5957	/* not being set */
	5958	case CAST_USER_ADDR_T(-1):
	5959	break;
	5960	default:
	5961	if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
	5962	return(error);
	5963	VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
	5964	}
	5965
	5966	error = fchmod1(p, uap->fd, &va);
	5967
	5968
	5969	switch(uap->xsecurity) {
	5970	case USER_ADDR_NULL:
	5971	case CAST_USER_ADDR_T(-1):
	5972	break;
	5973	default:
	5974	if (xsecdst != NULL)
	5975	kauth_filesec_free(xsecdst);
	5976	}
	5977	return(error);
	5978	}
	5979
	5980	int
	5981	fchmod(proc_t p, struct fchmod_args uap, __unused int32_t retval)
	5982	{
	5983	struct vnode_attr va;
	5984
	5985	VATTR_INIT(&va);
	5986	VATTR_SET(&va, va_mode, uap->mode & ALLPERMS);
	5987
	5988	return(fchmod1(p, uap->fd, &va));
	5989	}
	5990
	5991
	5992	/*
	5993	* Set ownership given a path name.
	5994	*/
	5995	/* ARGSUSED */
	5996	static int
	5997	fchownat_internal(vfs_context_t ctx, int fd, user_addr_t path, uid_t uid,
	5998	gid_t gid, int flag, enum uio_seg segflg)
	5999	{
	6000	vnode_t vp;
	6001	struct vnode_attr va;
	6002	int error;
	6003	struct nameidata nd;
	6004	int follow;
	6005	kauth_action_t action;
	6006
	6007	AUDIT_ARG(owner, uid, gid);
	6008
	6009	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
	6010	NDINIT(&nd, LOOKUP, OP_SETATTR, follow \| AUDITVNPATH1, segflg,
	6011	path, ctx);
	6012	error = nameiat(&nd, fd);
	6013	if (error)
	6014	return (error);
	6015	vp = nd.ni_vp;
	6016
	6017	nameidone(&nd);
	6018
	6019	VATTR_INIT(&va);
	6020	if (uid != (uid_t)VNOVAL)
	6021	VATTR_SET(&va, va_uid, uid);
	6022	if (gid != (gid_t)VNOVAL)
	6023	VATTR_SET(&va, va_gid, gid);
	6024
	6025	#if CONFIG_MACF
	6026	error = mac_vnode_check_setowner(ctx, vp, uid, gid);
	6027	if (error)
	6028	goto out;
	6029	#endif
	6030
	6031	/* preflight and authorize attribute changes */
	6032	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
	6033	goto out;
	6034	if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
	6035	goto out;
	6036	error = vnode_setattr(vp, &va, ctx);
	6037
	6038	out:
	6039	/*
	6040	* EACCES is only allowed from namei(); permissions failure should
	6041	* return EPERM, so we need to translate the error code.
	6042	*/
	6043	if (error == EACCES)
	6044	error = EPERM;
	6045
	6046	vnode_put(vp);
	6047	return (error);
	6048	}
	6049
	6050	int
	6051	chown(__unused proc_t p, struct chown_args uap, __unused int32_t retval)
	6052	{
	6053	return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
	6054	uap->uid, uap->gid, 0, UIO_USERSPACE));
	6055	}
	6056
	6057	int
	6058	lchown(__unused proc_t p, struct lchown_args uap, __unused int32_t retval)
	6059	{
	6060	return (fchownat_internal(vfs_context_current(), AT_FDCWD, uap->path,
	6061	uap->owner, uap->group, AT_SYMLINK_NOFOLLOW, UIO_USERSPACE));
	6062	}
	6063
	6064	int
	6065	fchownat(__unused proc_t p, struct fchownat_args uap, __unused int32_t retval)
	6066	{
	6067	if (uap->flag & ~AT_SYMLINK_NOFOLLOW)
	6068	return (EINVAL);
	6069
	6070	return (fchownat_internal(vfs_context_current(), uap->fd, uap->path,
	6071	uap->uid, uap->gid, uap->flag, UIO_USERSPACE));
	6072	}
	6073
	6074	/*
	6075	* Set ownership given a file descriptor.
	6076	*/
	6077	/* ARGSUSED */
	6078	int
	6079	fchown(__unused proc_t p, struct fchown_args uap, __unused int32_t retval)
	6080	{
	6081	struct vnode_attr va;
	6082	vfs_context_t ctx = vfs_context_current();
	6083	vnode_t vp;
	6084	int error;
	6085	kauth_action_t action;
	6086
	6087	AUDIT_ARG(owner, uap->uid, uap->gid);
	6088	AUDIT_ARG(fd, uap->fd);
	6089
	6090	if ( (error = file_vnode(uap->fd, &vp)) )
	6091	return (error);
	6092
	6093	if ( (error = vnode_getwithref(vp)) ) {
	6094	file_drop(uap->fd);
	6095	return(error);
	6096	}
	6097	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	6098
	6099	VATTR_INIT(&va);
	6100	if (uap->uid != VNOVAL)
	6101	VATTR_SET(&va, va_uid, uap->uid);
	6102	if (uap->gid != VNOVAL)
	6103	VATTR_SET(&va, va_gid, uap->gid);
	6104
	6105	#if NAMEDSTREAMS
	6106	/* chown calls are not allowed for resource forks. */
	6107	if (vp->v_flag & VISNAMEDSTREAM) {
	6108	error = EPERM;
	6109	goto out;
	6110	}
	6111	#endif
	6112
	6113	#if CONFIG_MACF
	6114	error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid);
	6115	if (error)
	6116	goto out;
	6117	#endif
	6118
	6119	/* preflight and authorize attribute changes */
	6120	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
	6121	goto out;
	6122	if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
	6123	if (error == EACCES)
	6124	error = EPERM;
	6125	goto out;
	6126	}
	6127	error = vnode_setattr(vp, &va, ctx);
	6128
	6129	out:
	6130	(void)vnode_put(vp);
	6131	file_drop(uap->fd);
	6132	return (error);
	6133	}
	6134
	6135	static int
	6136	getutimes(user_addr_t usrtvp, struct timespec *tsp)
	6137	{
	6138	int error;
	6139
	6140	if (usrtvp == USER_ADDR_NULL) {
	6141	struct timeval old_tv;
	6142	/* XXX Y2038 bug because of microtime argument */
	6143	microtime(&old_tv);
	6144	TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]);
	6145	tsp[1] = tsp[0];
	6146	} else {
	6147	if (IS_64BIT_PROCESS(current_proc())) {
	6148	struct user64_timeval tv[2];
	6149	error = copyin(usrtvp, (void *)tv, sizeof(tv));
	6150	if (error)
	6151	return (error);
	6152	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	6153	TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
	6154	} else {
	6155	struct user32_timeval tv[2];
	6156	error = copyin(usrtvp, (void *)tv, sizeof(tv));
	6157	if (error)
	6158	return (error);
	6159	TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]);
	6160	TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]);
	6161	}
	6162	}
	6163	return 0;
	6164	}
	6165
	6166	static int
	6167	setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts,
	6168	int nullflag)
	6169	{
	6170	int error;
	6171	struct vnode_attr va;
	6172	kauth_action_t action;
	6173
	6174	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	6175
	6176	VATTR_INIT(&va);
	6177	VATTR_SET(&va, va_access_time, ts[0]);
	6178	VATTR_SET(&va, va_modify_time, ts[1]);
	6179	if (nullflag)
	6180	va.va_vaflags \|= VA_UTIMES_NULL;
	6181
	6182	#if NAMEDSTREAMS
	6183	/* utimes calls are not allowed for resource forks. */
	6184	if (vp->v_flag & VISNAMEDSTREAM) {
	6185	error = EPERM;
	6186	goto out;
	6187	}
	6188	#endif
	6189
	6190	#if CONFIG_MACF
	6191	error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]);
	6192	if (error)
	6193	goto out;
	6194	#endif
	6195	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) {
	6196	if (!nullflag && error == EACCES)
	6197	error = EPERM;
	6198	goto out;
	6199	}
	6200
	6201	/* since we may not need to auth anything, check here */
	6202	if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) {
	6203	if (!nullflag && error == EACCES)
	6204	error = EPERM;
	6205	goto out;
	6206	}
	6207	error = vnode_setattr(vp, &va, ctx);
	6208
	6209	out:
	6210	return error;
	6211	}
	6212
	6213	/*
	6214	* Set the access and modification times of a file.
	6215	*/
	6216	/* ARGSUSED */
	6217	int
	6218	utimes(__unused proc_t p, struct utimes_args uap, __unused int32_t retval)
	6219	{
	6220	struct timespec ts[2];
	6221	user_addr_t usrtvp;
	6222	int error;
	6223	struct nameidata nd;
	6224	vfs_context_t ctx = vfs_context_current();
	6225
	6226	/*
	6227	* AUDIT: Needed to change the order of operations to do the
	6228	* name lookup first because auditing wants the path.
	6229	*/
	6230	NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW \| AUDITVNPATH1,
	6231	UIO_USERSPACE, uap->path, ctx);
	6232	error = namei(&nd);
	6233	if (error)
	6234	return (error);
	6235	nameidone(&nd);
	6236
	6237	/*
	6238	* Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch
	6239	* the current time instead.
	6240	*/
	6241	usrtvp = uap->tptr;
	6242	if ((error = getutimes(usrtvp, ts)) != 0)
	6243	goto out;
	6244
	6245	error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL);
	6246
	6247	out:
	6248	vnode_put(nd.ni_vp);
	6249	return (error);
	6250	}
	6251
	6252	/*
	6253	* Set the access and modification times of a file.
	6254	*/
	6255	/* ARGSUSED */
	6256	int
	6257	futimes(__unused proc_t p, struct futimes_args uap, __unused int32_t retval)
	6258	{
	6259	struct timespec ts[2];
	6260	vnode_t vp;
	6261	user_addr_t usrtvp;
	6262	int error;
	6263
	6264	AUDIT_ARG(fd, uap->fd);
	6265	usrtvp = uap->tptr;
	6266	if ((error = getutimes(usrtvp, ts)) != 0)
	6267	return (error);
	6268	if ((error = file_vnode(uap->fd, &vp)) != 0)
	6269	return (error);
	6270	if((error = vnode_getwithref(vp))) {
	6271	file_drop(uap->fd);
	6272	return(error);
	6273	}
	6274
	6275	error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0);
	6276	vnode_put(vp);
	6277	file_drop(uap->fd);
	6278	return(error);
	6279	}
	6280
	6281	/*
	6282	* Truncate a file given its path name.
	6283	*/
	6284	/* ARGSUSED */
	6285	int
	6286	truncate(__unused proc_t p, struct truncate_args uap, __unused int32_t retval)
	6287	{
	6288	vnode_t vp;
	6289	struct vnode_attr va;
	6290	vfs_context_t ctx = vfs_context_current();
	6291	int error;
	6292	struct nameidata nd;
	6293	kauth_action_t action;
	6294
	6295	if (uap->length < 0)
	6296	return(EINVAL);
	6297	NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW \| AUDITVNPATH1,
	6298	UIO_USERSPACE, uap->path, ctx);
	6299	if ((error = namei(&nd)))
	6300	return (error);
	6301	vp = nd.ni_vp;
	6302
	6303	nameidone(&nd);
	6304
	6305	VATTR_INIT(&va);
	6306	VATTR_SET(&va, va_data_size, uap->length);
	6307
	6308	#if CONFIG_MACF
	6309	error = mac_vnode_check_truncate(ctx, NOCRED, vp);
	6310	if (error)
	6311	goto out;
	6312	#endif
	6313
	6314	if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0)
	6315	goto out;
	6316	if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0))
	6317	goto out;
	6318	error = vnode_setattr(vp, &va, ctx);
	6319	out:
	6320	vnode_put(vp);
	6321	return (error);
	6322	}
	6323
	6324	/*
	6325	* Truncate a file given a file descriptor.
	6326	*/
	6327	/* ARGSUSED */
	6328	int
	6329	ftruncate(proc_t p, struct ftruncate_args uap, int32_t retval)
	6330	{
	6331	vfs_context_t ctx = vfs_context_current();
	6332	struct vnode_attr va;
	6333	vnode_t vp;
	6334	struct fileproc *fp;
	6335	int error ;
	6336	int fd = uap->fd;
	6337
	6338	AUDIT_ARG(fd, uap->fd);
	6339	if (uap->length < 0)
	6340	return(EINVAL);
	6341
	6342	if ( (error = fp_lookup(p,fd,&fp,0)) ) {
	6343	return(error);
	6344	}
	6345
	6346	switch (FILEGLOB_DTYPE(fp->f_fglob)) {
	6347	case DTYPE_PSXSHM:
	6348	error = pshm_truncate(p, fp, uap->fd, uap->length, retval);
	6349	goto out;
	6350	case DTYPE_VNODE:
	6351	break;
	6352	default:
	6353	error = EINVAL;
	6354	goto out;
	6355	}
	6356
	6357	vp = (vnode_t)fp->f_fglob->fg_data;
	6358
	6359	if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
	6360	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
	6361	error = EINVAL;
	6362	goto out;
	6363	}
	6364
	6365	if ((error = vnode_getwithref(vp)) != 0) {
	6366	goto out;
	6367	}
	6368
	6369	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	6370
	6371	#if CONFIG_MACF
	6372	error = mac_vnode_check_truncate(ctx,
	6373	fp->f_fglob->fg_cred, vp);
	6374	if (error) {
	6375	(void)vnode_put(vp);
	6376	goto out;
	6377	}
	6378	#endif
	6379	VATTR_INIT(&va);
	6380	VATTR_SET(&va, va_data_size, uap->length);
	6381	error = vnode_setattr(vp, &va, ctx);
	6382	(void)vnode_put(vp);
	6383	out:
	6384	file_drop(fd);
	6385	return (error);
	6386	}
	6387
	6388
	6389	/*
	6390	* Sync an open file with synchronized I/O _file_ integrity completion
	6391	*/
	6392	/* ARGSUSED */
	6393	int
	6394	fsync(proc_t p, struct fsync_args uap, __unused int32_t retval)
	6395	{
	6396	__pthread_testcancel(1);
	6397	return(fsync_common(p, uap, MNT_WAIT));
	6398	}
	6399
	6400
	6401	/*
	6402	* Sync an open file with synchronized I/O _file_ integrity completion
	6403	*
	6404	* Notes: This is a legacy support function that does not test for
	6405	* thread cancellation points.
	6406	*/
	6407	/* ARGSUSED */
	6408	int
	6409	fsync_nocancel(proc_t p, struct fsync_nocancel_args uap, __unused int32_t retval)
	6410	{
	6411	return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT));
	6412	}
	6413
	6414
	6415	/*
	6416	* Sync an open file with synchronized I/O _data_ integrity completion
	6417	*/
	6418	/* ARGSUSED */
	6419	int
	6420	fdatasync(proc_t p, struct fdatasync_args uap, __unused int32_t retval)
	6421	{
	6422	__pthread_testcancel(1);
	6423	return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT));
	6424	}
	6425
	6426
	6427	/*
	6428	* fsync_common
	6429	*
	6430	* Common fsync code to support both synchronized I/O file integrity completion
	6431	* (normal fsync) and synchronized I/O data integrity completion (fdatasync).
	6432	*
	6433	* If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which
	6434	* will only guarantee that the file data contents are retrievable. If
	6435	* 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also
	6436	* includes additional metadata unnecessary for retrieving the file data
	6437	* contents, such as atime, mtime, ctime, etc., also be committed to stable
	6438	* storage.
	6439	*
	6440	* Parameters: p The process
	6441	* uap->fd The descriptor to synchronize
	6442	* flags The data integrity flags
	6443	*
	6444	* Returns: int Success
	6445	* fp_getfvp:EBADF Bad file descriptor
	6446	* fp_getfvp:ENOTSUP fd does not refer to a vnode
	6447	* VNOP_FSYNC:??? unspecified
	6448	*
	6449	* Notes: We use struct fsync_args because it is a short name, and all
	6450	* caller argument structures are otherwise identical.
	6451	*/
	6452	static int
	6453	fsync_common(proc_t p, struct fsync_args *uap, int flags)
	6454	{
	6455	vnode_t vp;
	6456	struct fileproc *fp;
	6457	vfs_context_t ctx = vfs_context_current();
	6458	int error;
	6459
	6460	AUDIT_ARG(fd, uap->fd);
	6461
	6462	if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) )
	6463	return (error);
	6464	if ( (error = vnode_getwithref(vp)) ) {
	6465	file_drop(uap->fd);
	6466	return(error);
	6467	}
	6468
	6469	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	6470
	6471	error = VNOP_FSYNC(vp, flags, ctx);
	6472
	6473	#if NAMEDRSRCFORK
	6474	/* Sync resource fork shadow file if necessary. */
	6475	if ((error == 0) &&
	6476	(vp->v_flag & VISNAMEDSTREAM) &&
	6477	(vp->v_parent != NULLVP) &&
	6478	vnode_isshadow(vp) &&
	6479	(fp->f_flags & FP_WRITTEN)) {
	6480	(void) vnode_flushnamedstream(vp->v_parent, vp, ctx);
	6481	}
	6482	#endif
	6483
	6484	(void)vnode_put(vp);
	6485	file_drop(uap->fd);
	6486	return (error);
	6487	}
	6488
	6489	/*
	6490	* Duplicate files. Source must be a file, target must be a file or
	6491	* must not exist.
	6492	*
	6493	* XXX Copyfile authorisation checking is woefully inadequate, and will not
	6494	* perform inheritance correctly.
	6495	*/
	6496	/* ARGSUSED */
	6497	int
	6498	copyfile(__unused proc_t p, struct copyfile_args uap, __unused int32_t retval)
	6499	{
	6500	vnode_t tvp, fvp, tdvp, sdvp;
	6501	struct nameidata fromnd, tond;
	6502	int error;
	6503	vfs_context_t ctx = vfs_context_current();
	6504
	6505	/* Check that the flags are valid. */
	6506
	6507	if (uap->flags & ~CPF_MASK) {
	6508	return(EINVAL);
	6509	}
	6510
	6511	NDINIT(&fromnd, LOOKUP, OP_COPYFILE, AUDITVNPATH1,
	6512	UIO_USERSPACE, uap->from, ctx);
	6513	if ((error = namei(&fromnd)))
	6514	return (error);
	6515	fvp = fromnd.ni_vp;
	6516
	6517	NDINIT(&tond, CREATE, OP_LINK,
	6518	LOCKPARENT \| LOCKLEAF \| NOCACHE \| SAVESTART \| AUDITVNPATH2 \| CN_NBMOUNTLOOK,
	6519	UIO_USERSPACE, uap->to, ctx);
	6520	if ((error = namei(&tond))) {
	6521	goto out1;
	6522	}
	6523	tdvp = tond.ni_dvp;
	6524	tvp = tond.ni_vp;
	6525
	6526	if (tvp != NULL) {
	6527	if (!(uap->flags & CPF_OVERWRITE)) {
	6528	error = EEXIST;
	6529	goto out;
	6530	}
	6531	}
	6532	if (fvp->v_type == VDIR \|\| (tvp && tvp->v_type == VDIR)) {
	6533	error = EISDIR;
	6534	goto out;
	6535	}
	6536
	6537	if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0)
	6538	goto out;
	6539
	6540	if (fvp == tdvp)
	6541	error = EINVAL;
	6542	/*
	6543	* If source is the same as the destination (that is the
	6544	* same inode number) then there is nothing to do.
	6545	* (fixed to have POSIX semantics - CSM 3/2/98)
	6546	*/
	6547	if (fvp == tvp)
	6548	error = -1;
	6549	if (!error)
	6550	error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx);
	6551	out:
	6552	sdvp = tond.ni_startdir;
	6553	/*
	6554	* nameidone has to happen before we vnode_put(tdvp)
	6555	* since it may need to release the fs_nodelock on the tdvp
	6556	*/
	6557	nameidone(&tond);
	6558
	6559	if (tvp)
	6560	vnode_put(tvp);
	6561	vnode_put(tdvp);
	6562	vnode_put(sdvp);
	6563	out1:
	6564	vnode_put(fvp);
	6565
	6566	nameidone(&fromnd);
	6567
	6568	if (error == -1)
	6569	return (0);
	6570	return (error);
	6571	}
	6572
	6573
	6574	/*
	6575	* Rename files. Source and destination must either both be directories,
	6576	* or both not be directories. If target is a directory, it must be empty.
	6577	*/
	6578	/* ARGSUSED */
	6579	static int
	6580	renameat_internal(vfs_context_t ctx, int fromfd, user_addr_t from,
	6581	int tofd, user_addr_t to, int segflg, vfs_rename_flags_t flags)
	6582	{
	6583	vnode_t tvp, tdvp;
	6584	vnode_t fvp, fdvp;
	6585	struct nameidata fromnd, tond;
	6586	int error;
	6587	int do_retry;
	6588	int retry_count;
	6589	int mntrename;
	6590	int need_event;
	6591	const char *oname = NULL;
	6592	char from_name = NULL, to_name = NULL;
	6593	int from_len=0, to_len=0;
	6594	int holding_mntlock;
	6595	mount_t locked_mp = NULL;
	6596	vnode_t oparent = NULLVP;
	6597	#if CONFIG_FSE
	6598	fse_info from_finfo, to_finfo;
	6599	#endif
	6600	int from_truncated=0, to_truncated;
	6601	int batched = 0;
	6602	struct vnode_attr fvap, tvap;
	6603	int continuing = 0;
	6604	/* carving out a chunk for structs that are too big to be on stack. */
	6605	struct {
	6606	struct nameidata from_node, to_node;
	6607	struct vnode_attr fv_attr, tv_attr;
	6608	} * __rename_data;
	6609	MALLOC(__rename_data, void , sizeof(__rename_data), M_TEMP, M_WAITOK);
	6610	fromnd = &__rename_data->from_node;
	6611	tond = &__rename_data->to_node;
	6612
	6613	holding_mntlock = 0;
	6614	do_retry = 0;
	6615	retry_count = 0;
	6616	retry:
	6617	fvp = tvp = NULL;
	6618	fdvp = tdvp = NULL;
	6619	fvap = tvap = NULL;
	6620	mntrename = FALSE;
	6621
	6622	NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT \| AUDITVNPATH1,
	6623	segflg, from, ctx);
	6624	fromnd->ni_flag = NAMEI_COMPOUNDRENAME;
	6625
	6626	NDINIT(tond, RENAME, OP_RENAME, WANTPARENT \| AUDITVNPATH2 \| CN_NBMOUNTLOOK,
	6627	segflg, to, ctx);
	6628	tond->ni_flag = NAMEI_COMPOUNDRENAME;
	6629
	6630	continue_lookup:
	6631	if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 \|\| !continuing) {
	6632	if ( (error = nameiat(fromnd, fromfd)) )
	6633	goto out1;
	6634	fdvp = fromnd->ni_dvp;
	6635	fvp = fromnd->ni_vp;
	6636
	6637	if (fvp && fvp->v_type == VDIR)
	6638	tond->ni_cnd.cn_flags \|= WILLBEDIR;
	6639	}
	6640
	6641	if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 \|\| !continuing) {
	6642	if ( (error = nameiat(tond, tofd)) ) {
	6643	/*
	6644	* Translate error code for rename("dir1", "dir2/.").
	6645	*/
	6646	if (error == EISDIR && fvp->v_type == VDIR)
	6647	error = EINVAL;
	6648	goto out1;
	6649	}
	6650	tdvp = tond->ni_dvp;
	6651	tvp = tond->ni_vp;
	6652	}
	6653
	6654	batched = vnode_compound_rename_available(fdvp);
	6655	if (!fvp) {
	6656	/*
	6657	* Claim: this check will never reject a valid rename.
	6658	* For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp.
	6659	* Suppose fdvp and tdvp are not on the same mount.
	6660	* If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root,
	6661	* then you can't move it to within another dir on the same mountpoint.
	6662	* If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction.
	6663	*
	6664	* If this check passes, then we are safe to pass these vnodes to the same FS.
	6665	*/
	6666	if (fdvp->v_mount != tdvp->v_mount) {
	6667	error = EXDEV;
	6668	goto out1;
	6669	}
	6670	goto skipped_lookup;
	6671	}
	6672
	6673	if (!batched) {
	6674	error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL);
	6675	if (error) {
	6676	if (error == ENOENT) {
	6677	assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	6678	if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	6679	/*
	6680	* We encountered a race where after doing the namei, tvp stops
	6681	* being valid. If so, simply re-drive the rename call from the
	6682	* top.
	6683	*/
	6684	do_retry = 1;
	6685	retry_count += 1;
	6686	}
	6687	}
	6688	goto out1;
	6689	}
	6690	}
	6691
	6692	/*
	6693	* If the source and destination are the same (i.e. they're
	6694	* links to the same vnode) and the target file system is
	6695	* case sensitive, then there is nothing to do.
	6696	*
	6697	* XXX Come back to this.
	6698	*/
	6699	if (fvp == tvp) {
	6700	int pathconf_val;
	6701
	6702	/*
	6703	* Note: if _PC_CASE_SENSITIVE selector isn't supported,
	6704	* then assume that this file system is case sensitive.
	6705	*/
	6706	if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 \|\|
	6707	pathconf_val != 0) {
	6708	goto out1;
	6709	}
	6710	}
	6711
	6712	/*
	6713	* Allow the renaming of mount points.
	6714	* - target must not exist
	6715	* - target must reside in the same directory as source
	6716	* - union mounts cannot be renamed
	6717	* - "/" cannot be renamed
	6718	*
	6719	* XXX Handle this in VFS after a continued lookup (if we missed
	6720	* in the cache to start off)
	6721	*/
	6722	if ((fvp->v_flag & VROOT) &&
	6723	(fvp->v_type == VDIR) &&
	6724	(tvp == NULL) &&
	6725	(fvp->v_mountedhere == NULL) &&
	6726	(fdvp == tdvp) &&
	6727	((fvp->v_mount->mnt_flag & (MNT_UNION \| MNT_ROOTFS)) == 0) &&
	6728	(fvp->v_mount->mnt_vnodecovered != NULLVP)) {
	6729	vnode_t coveredvp;
	6730
	6731	/* switch fvp to the covered vnode */
	6732	coveredvp = fvp->v_mount->mnt_vnodecovered;
	6733	if ( (vnode_getwithref(coveredvp)) ) {
	6734	error = ENOENT;
	6735	goto out1;
	6736	}
	6737	vnode_put(fvp);
	6738
	6739	fvp = coveredvp;
	6740	mntrename = TRUE;
	6741	}
	6742	/*
	6743	* Check for cross-device rename.
	6744	*/
	6745	if ((fvp->v_mount != tdvp->v_mount) \|\|
	6746	(tvp && (fvp->v_mount != tvp->v_mount))) {
	6747	error = EXDEV;
	6748	goto out1;
	6749	}
	6750
	6751	/*
	6752	* If source is the same as the destination (that is the
	6753	* same inode number) then there is nothing to do...
	6754	* EXCEPT if the underlying file system supports case
	6755	* insensitivity and is case preserving. In this case
	6756	* the file system needs to handle the special case of
	6757	* getting the same vnode as target (fvp) and source (tvp).
	6758	*
	6759	* Only file systems that support pathconf selectors _PC_CASE_SENSITIVE
	6760	* and _PC_CASE_PRESERVING can have this exception, and they need to
	6761	* handle the special case of getting the same vnode as target and
	6762	* source. NOTE: Then the target is unlocked going into vnop_rename,
	6763	* so not to cause locking problems. There is a single reference on tvp.
	6764	*
	6765	* NOTE - that fvp == tvp also occurs if they are hard linked and
	6766	* that correct behaviour then is just to return success without doing
	6767	* anything.
	6768	*
	6769	* XXX filesystem should take care of this itself, perhaps...
	6770	*/
	6771	if (fvp == tvp && fdvp == tdvp) {
	6772	if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen &&
	6773	!bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr,
	6774	fromnd->ni_cnd.cn_namelen)) {
	6775	goto out1;
	6776	}
	6777	}
	6778
	6779	if (holding_mntlock && fvp->v_mount != locked_mp) {
	6780	/*
	6781	* we're holding a reference and lock
	6782	* on locked_mp, but it no longer matches
	6783	* what we want to do... so drop our hold
	6784	*/
	6785	mount_unlock_renames(locked_mp);
	6786	mount_drop(locked_mp, 0);
	6787	holding_mntlock = 0;
	6788	}
	6789	if (tdvp != fdvp && fvp->v_type == VDIR) {
	6790	/*
	6791	* serialize renames that re-shape
	6792	* the tree... if holding_mntlock is
	6793	* set, then we're ready to go...
	6794	* otherwise we
	6795	* first need to drop the iocounts
	6796	* we picked up, second take the
	6797	* lock to serialize the access,
	6798	* then finally start the lookup
	6799	* process over with the lock held
	6800	*/
	6801	if (!holding_mntlock) {
	6802	/*
	6803	* need to grab a reference on
	6804	* the mount point before we
	6805	* drop all the iocounts... once
	6806	* the iocounts are gone, the mount
	6807	* could follow
	6808	*/
	6809	locked_mp = fvp->v_mount;
	6810	mount_ref(locked_mp, 0);
	6811
	6812	/*
	6813	* nameidone has to happen before we vnode_put(tvp)
	6814	* since it may need to release the fs_nodelock on the tvp
	6815	*/
	6816	nameidone(tond);
	6817
	6818	if (tvp)
	6819	vnode_put(tvp);
	6820	vnode_put(tdvp);
	6821
	6822	/*
	6823	* nameidone has to happen before we vnode_put(fdvp)
	6824	* since it may need to release the fs_nodelock on the fvp
	6825	*/
	6826	nameidone(fromnd);
	6827
	6828	vnode_put(fvp);
	6829	vnode_put(fdvp);
	6830
	6831	mount_lock_renames(locked_mp);
	6832	holding_mntlock = 1;
	6833
	6834	goto retry;
	6835	}
	6836	} else {
	6837	/*
	6838	* when we dropped the iocounts to take
	6839	* the lock, we allowed the identity of
	6840	* the various vnodes to change... if they did,
	6841	* we may no longer be dealing with a rename
	6842	* that reshapes the tree... once we're holding
	6843	* the iocounts, the vnodes can't change type
	6844	* so we're free to drop the lock at this point
	6845	* and continue on
	6846	*/
	6847	if (holding_mntlock) {
	6848	mount_unlock_renames(locked_mp);
	6849	mount_drop(locked_mp, 0);
	6850	holding_mntlock = 0;
	6851	}
	6852	}
	6853
	6854	// save these off so we can later verify that fvp is the same
	6855	oname = fvp->v_name;
	6856	oparent = fvp->v_parent;
	6857
	6858	skipped_lookup:
	6859	#if CONFIG_FSE
	6860	need_event = need_fsevent(FSE_RENAME, fdvp);
	6861	if (need_event) {
	6862	if (fvp) {
	6863	get_fse_info(fvp, &from_finfo, ctx);
	6864	} else {
	6865	error = vfs_get_notify_attributes(&__rename_data->fv_attr);
	6866	if (error) {
	6867	goto out1;
	6868	}
	6869
	6870	fvap = &__rename_data->fv_attr;
	6871	}
	6872
	6873	if (tvp) {
	6874	get_fse_info(tvp, &to_finfo, ctx);
	6875	} else if (batched) {
	6876	error = vfs_get_notify_attributes(&__rename_data->tv_attr);
	6877	if (error) {
	6878	goto out1;
	6879	}
	6880
	6881	tvap = &__rename_data->tv_attr;
	6882	}
	6883	}
	6884	#else
	6885	need_event = 0;
	6886	#endif /* CONFIG_FSE */
	6887
	6888	if (need_event \|\| kauth_authorize_fileop_has_listeners()) {
	6889	if (from_name == NULL) {
	6890	GET_PATH(from_name);
	6891	if (from_name == NULL) {
	6892	error = ENOMEM;
	6893	goto out1;
	6894	}
	6895	}
	6896
	6897	from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated);
	6898
	6899	if (to_name == NULL) {
	6900	GET_PATH(to_name);
	6901	if (to_name == NULL) {
	6902	error = ENOMEM;
	6903	goto out1;
	6904	}
	6905	}
	6906
	6907	to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated);
	6908	}
	6909	#if CONFIG_SECLUDED_RENAME
	6910	if (flags & VFS_SECLUDE_RENAME) {
	6911	fromnd->ni_cnd.cn_flags \|= CN_SECLUDE_RENAME;
	6912	}
	6913	#else
	6914	#pragma unused(flags)
	6915	#endif
	6916	error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap,
	6917	tdvp, &tvp, &tond->ni_cnd, tvap,
	6918	0, ctx);
	6919
	6920	if (holding_mntlock) {
	6921	/*
	6922	* we can drop our serialization
	6923	* lock now
	6924	*/
	6925	mount_unlock_renames(locked_mp);
	6926	mount_drop(locked_mp, 0);
	6927	holding_mntlock = 0;
	6928	}
	6929	if (error) {
	6930	if (error == EKEEPLOOKING) {
	6931	if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) {
	6932	if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) {
	6933	panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?");
	6934	}
	6935	}
	6936
	6937	fromnd->ni_vp = fvp;
	6938	tond->ni_vp = tvp;
	6939
	6940	goto continue_lookup;
	6941	}
	6942
	6943	/*
	6944	* We may encounter a race in the VNOP where the destination didn't
	6945	* exist when we did the namei, but it does by the time we go and
	6946	* try to create the entry. In this case, we should re-drive this rename
	6947	* call from the top again. Currently, only HFS bubbles out ERECYCLE,
	6948	* but other filesystems susceptible to this race could return it, too.
	6949	*/
	6950	if (error == ERECYCLE) {
	6951	do_retry = 1;
	6952	}
	6953
	6954	/*
	6955	* For compound VNOPs, the authorization callback may return
	6956	* ENOENT in case of racing hardlink lookups hitting the name
	6957	* cache, redrive the lookup.
	6958	*/
	6959	if (batched && error == ENOENT) {
	6960	assert(retry_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	6961	if (retry_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	6962	do_retry = 1;
	6963	retry_count += 1;
	6964	}
	6965	}
	6966
	6967	goto out1;
	6968	}
	6969
	6970	/* call out to allow 3rd party notification of rename.
	6971	* Ignore result of kauth_authorize_fileop call.
	6972	*/
	6973	kauth_authorize_fileop(vfs_context_ucred(ctx),
	6974	KAUTH_FILEOP_RENAME,
	6975	(uintptr_t)from_name, (uintptr_t)to_name);
	6976
	6977	#if CONFIG_FSE
	6978	if (from_name != NULL && to_name != NULL) {
	6979	if (from_truncated \|\| to_truncated) {
	6980	// set it here since only the from_finfo gets reported up to user space
	6981	from_finfo.mode \|= FSE_TRUNCATED_PATH;
	6982	}
	6983
	6984	if (tvap && tvp) {
	6985	vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap);
	6986	}
	6987	if (fvap) {
	6988	vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap);
	6989	}
	6990
	6991	if (tvp) {
	6992	add_fsevent(FSE_RENAME, ctx,
	6993	FSE_ARG_STRING, from_len, from_name,
	6994	FSE_ARG_FINFO, &from_finfo,
	6995	FSE_ARG_STRING, to_len, to_name,
	6996	FSE_ARG_FINFO, &to_finfo,
	6997	FSE_ARG_DONE);
	6998	} else {
	6999	add_fsevent(FSE_RENAME, ctx,
	7000	FSE_ARG_STRING, from_len, from_name,
	7001	FSE_ARG_FINFO, &from_finfo,
	7002	FSE_ARG_STRING, to_len, to_name,
	7003	FSE_ARG_DONE);
	7004	}
	7005	}
	7006	#endif /* CONFIG_FSE */
	7007
	7008	/*
	7009	* update filesystem's mount point data
	7010	*/
	7011	if (mntrename) {
	7012	char cp, pathend, *mpname;
	7013	char * tobuf;
	7014	struct mount *mp;
	7015	int maxlen;
	7016	size_t len = 0;
	7017
	7018	mp = fvp->v_mountedhere;
	7019
	7020	if (vfs_busy(mp, LK_NOWAIT)) {
	7021	error = EBUSY;
	7022	goto out1;
	7023	}
	7024	MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
	7025
	7026	if (UIO_SEG_IS_USER_SPACE(segflg))
	7027	error = copyinstr(to, tobuf, MAXPATHLEN, &len);
	7028	else
	7029	error = copystr((void *)to, tobuf, MAXPATHLEN, &len);
	7030	if (!error) {
	7031	/* find current mount point prefix */
	7032	pathend = &mp->mnt_vfsstat.f_mntonname[0];
	7033	for (cp = pathend; *cp != '\0'; ++cp) {
	7034	if (*cp == '/')
	7035	pathend = cp + 1;
	7036	}
	7037	/* find last component of target name */
	7038	for (mpname = cp = tobuf; *cp != '\0'; ++cp) {
	7039	if (*cp == '/')
	7040	mpname = cp + 1;
	7041	}
	7042	/* append name to prefix */
	7043	maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname);
	7044	bzero(pathend, maxlen);
	7045	strlcpy(pathend, mpname, maxlen);
	7046	}
	7047	FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI);
	7048
	7049	vfs_unbusy(mp);
	7050	}
	7051	/*
	7052	* fix up name & parent pointers. note that we first
	7053	* check that fvp has the same name/parent pointers it
	7054	* had before the rename call... this is a 'weak' check
	7055	* at best...
	7056	*
	7057	* XXX oparent and oname may not be set in the compound vnop case
	7058	*/
	7059	if (batched \|\| (oname == fvp->v_name && oparent == fvp->v_parent)) {
	7060	int update_flags;
	7061
	7062	update_flags = VNODE_UPDATE_NAME;
	7063
	7064	if (fdvp != tdvp)
	7065	update_flags \|= VNODE_UPDATE_PARENT;
	7066
	7067	vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags);
	7068	}
	7069	out1:
	7070	if (to_name != NULL) {
	7071	RELEASE_PATH(to_name);
	7072	to_name = NULL;
	7073	}
	7074	if (from_name != NULL) {
	7075	RELEASE_PATH(from_name);
	7076	from_name = NULL;
	7077	}
	7078	if (holding_mntlock) {
	7079	mount_unlock_renames(locked_mp);
	7080	mount_drop(locked_mp, 0);
	7081	holding_mntlock = 0;
	7082	}
	7083	if (tdvp) {
	7084	/*
	7085	* nameidone has to happen before we vnode_put(tdvp)
	7086	* since it may need to release the fs_nodelock on the tdvp
	7087	*/
	7088	nameidone(tond);
	7089
	7090	if (tvp)
	7091	vnode_put(tvp);
	7092	vnode_put(tdvp);
	7093	}
	7094	if (fdvp) {
	7095	/*
	7096	* nameidone has to happen before we vnode_put(fdvp)
	7097	* since it may need to release the fs_nodelock on the fdvp
	7098	*/
	7099	nameidone(fromnd);
	7100
	7101	if (fvp)
	7102	vnode_put(fvp);
	7103	vnode_put(fdvp);
	7104	}
	7105
	7106	/*
	7107	* If things changed after we did the namei, then we will re-drive
	7108	* this rename call from the top.
	7109	*/
	7110	if (do_retry) {
	7111	do_retry = 0;
	7112	goto retry;
	7113	}
	7114
	7115	FREE(__rename_data, M_TEMP);
	7116	return (error);
	7117	}
	7118
	7119	int
	7120	rename(__unused proc_t p, struct rename_args uap, __unused int32_t retval)
	7121	{
	7122	return (renameat_internal(vfs_context_current(), AT_FDCWD, uap->from,
	7123	AT_FDCWD, uap->to, UIO_USERSPACE, 0));
	7124	}
	7125
	7126	#if CONFIG_SECLUDED_RENAME
	7127	int rename_ext(__unused proc_t p, struct rename_ext_args uap, __unused int32_t retval)
	7128	{
	7129	return renameat_internal(
	7130	vfs_context_current(),
	7131	AT_FDCWD, uap->from,
	7132	AT_FDCWD, uap->to,
	7133	UIO_USERSPACE, uap->flags);
	7134	}
	7135	#endif
	7136
	7137	int
	7138	renameat(__unused proc_t p, struct renameat_args uap, __unused int32_t retval)
	7139	{
	7140	return (renameat_internal(vfs_context_current(), uap->fromfd, uap->from,
	7141	uap->tofd, uap->to, UIO_USERSPACE, 0));
	7142	}
	7143
	7144	/*
	7145	* Make a directory file.
	7146	*
	7147	* Returns: 0 Success
	7148	* EEXIST
	7149	* namei:???
	7150	* vnode_authorize:???
	7151	* vn_create:???
	7152	*/
	7153	/* ARGSUSED */
	7154	static int
	7155	mkdir1at(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap, int fd,
	7156	enum uio_seg segflg)
	7157	{
	7158	vnode_t vp, dvp;
	7159	int error;
	7160	int update_flags = 0;
	7161	int batched;
	7162	struct nameidata nd;
	7163
	7164	AUDIT_ARG(mode, vap->va_mode);
	7165	NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT \| AUDITVNPATH1, segflg,
	7166	path, ctx);
	7167	nd.ni_cnd.cn_flags \|= WILLBEDIR;
	7168	nd.ni_flag = NAMEI_COMPOUNDMKDIR;
	7169
	7170	continue_lookup:
	7171	error = nameiat(&nd, fd);
	7172	if (error)
	7173	return (error);
	7174	dvp = nd.ni_dvp;
	7175	vp = nd.ni_vp;
	7176
	7177	if (vp != NULL) {
	7178	error = EEXIST;
	7179	goto out;
	7180	}
	7181
	7182	batched = vnode_compound_mkdir_available(dvp);
	7183
	7184	VATTR_SET(vap, va_type, VDIR);
	7185
	7186	/*
	7187	* XXX
	7188	* Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will
	7189	* only get EXISTS or EISDIR for existing path components, and not that it could see
	7190	* EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz"
	7191	* it will fail in a spurious manner. Need to figure out if this is valid behavior.
	7192	*/
	7193	if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) {
	7194	if (error == EACCES \|\| error == EPERM) {
	7195	int error2;
	7196
	7197	nameidone(&nd);
	7198	vnode_put(dvp);
	7199	dvp = NULLVP;
	7200
	7201	/*
	7202	* Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST
	7203	* rather than EACCESS if the target exists.
	7204	*/
	7205	NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, segflg,
	7206	path, ctx);
	7207	error2 = nameiat(&nd, fd);
	7208	if (error2) {
	7209	goto out;
	7210	} else {
	7211	vp = nd.ni_vp;
	7212	error = EEXIST;
	7213	goto out;
	7214	}
	7215	}
	7216
	7217	goto out;
	7218	}
	7219
	7220	/*
	7221	* make the directory
	7222	*/
	7223	if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) {
	7224	if (error == EKEEPLOOKING) {
	7225	nd.ni_vp = vp;
	7226	goto continue_lookup;
	7227	}
	7228
	7229	goto out;
	7230	}
	7231
	7232	// Make sure the name & parent pointers are hooked up
	7233	if (vp->v_name == NULL)
	7234	update_flags \|= VNODE_UPDATE_NAME;
	7235	if (vp->v_parent == NULLVP)
	7236	update_flags \|= VNODE_UPDATE_PARENT;
	7237
	7238	if (update_flags)
	7239	vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags);
	7240
	7241	#if CONFIG_FSE
	7242	add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
	7243	#endif
	7244
	7245	out:
	7246	/*
	7247	* nameidone has to happen before we vnode_put(dvp)
	7248	* since it may need to release the fs_nodelock on the dvp
	7249	*/
	7250	nameidone(&nd);
	7251
	7252	if (vp)
	7253	vnode_put(vp);
	7254	if (dvp)
	7255	vnode_put(dvp);
	7256
	7257	return (error);
	7258	}
	7259
	7260	/*
	7261	* mkdir_extended: Create a directory; with extended security (ACL).
	7262	*
	7263	* Parameters: p Process requesting to create the directory
	7264	* uap User argument descriptor (see below)
	7265	* retval (ignored)
	7266	*
	7267	* Indirect: uap->path Path of directory to create
	7268	* uap->mode Access permissions to set
	7269	* uap->xsecurity ACL to set
	7270	*
	7271	* Returns: 0 Success
	7272	* !0 Not success
	7273	*
	7274	*/
	7275	int
	7276	mkdir_extended(proc_t p, struct mkdir_extended_args uap, __unused int32_t retval)
	7277	{
	7278	int ciferror;
	7279	kauth_filesec_t xsecdst;
	7280	struct vnode_attr va;
	7281
	7282	AUDIT_ARG(owner, uap->uid, uap->gid);
	7283
	7284	xsecdst = NULL;
	7285	if ((uap->xsecurity != USER_ADDR_NULL) &&
	7286	((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0))
	7287	return ciferror;
	7288
	7289	VATTR_INIT(&va);
	7290	VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
	7291	if (xsecdst != NULL)
	7292	VATTR_SET(&va, va_acl, &xsecdst->fsec_acl);
	7293
	7294	ciferror = mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
	7295	UIO_USERSPACE);
	7296	if (xsecdst != NULL)
	7297	kauth_filesec_free(xsecdst);
	7298	return ciferror;
	7299	}
	7300
	7301	int
	7302	mkdir(proc_t p, struct mkdir_args uap, __unused int32_t retval)
	7303	{
	7304	struct vnode_attr va;
	7305
	7306	VATTR_INIT(&va);
	7307	VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
	7308
	7309	return (mkdir1at(vfs_context_current(), uap->path, &va, AT_FDCWD,
	7310	UIO_USERSPACE));
	7311	}
	7312
	7313	int
	7314	mkdirat(proc_t p, struct mkdirat_args uap, __unused int32_t retval)
	7315	{
	7316	struct vnode_attr va;
	7317
	7318	VATTR_INIT(&va);
	7319	VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask);
	7320
	7321	return(mkdir1at(vfs_context_current(), uap->path, &va, uap->fd,
	7322	UIO_USERSPACE));
	7323	}
	7324
	7325	static int
	7326	rmdirat_internal(vfs_context_t ctx, int fd, user_addr_t dirpath,
	7327	enum uio_seg segflg)
	7328	{
	7329	vnode_t vp, dvp;
	7330	int error;
	7331	struct nameidata nd;
	7332	char *path = NULL;
	7333	int len=0;
	7334	int has_listeners = 0;
	7335	int need_event = 0;
	7336	int truncated = 0;
	7337	#if CONFIG_FSE
	7338	struct vnode_attr va;
	7339	#endif /* CONFIG_FSE */
	7340	struct vnode_attr *vap = NULL;
	7341	int restart_count = 0;
	7342	int batched;
	7343
	7344	int restart_flag;
	7345
	7346	/*
	7347	* This loop exists to restart rmdir in the unlikely case that two
	7348	* processes are simultaneously trying to remove the same directory
	7349	* containing orphaned appleDouble files.
	7350	*/
	7351	do {
	7352	NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT \| AUDITVNPATH1,
	7353	segflg, dirpath, ctx);
	7354	nd.ni_flag = NAMEI_COMPOUNDRMDIR;
	7355	continue_lookup:
	7356	restart_flag = 0;
	7357	vap = NULL;
	7358
	7359	error = nameiat(&nd, fd);
	7360	if (error)
	7361	return (error);
	7362
	7363	dvp = nd.ni_dvp;
	7364	vp = nd.ni_vp;
	7365
	7366	if (vp) {
	7367	batched = vnode_compound_rmdir_available(vp);
	7368
	7369	if (vp->v_flag & VROOT) {
	7370	/*
	7371	* The root of a mounted filesystem cannot be deleted.
	7372	*/
	7373	error = EBUSY;
	7374	goto out;
	7375	}
	7376
	7377	/*
	7378	* Removed a check here; we used to abort if vp's vid
	7379	* was not the same as what we'd seen the last time around.
	7380	* I do not think that check was valid, because if we retry
	7381	* and all dirents are gone, the directory could legitimately
	7382	* be recycled but still be present in a situation where we would
	7383	* have had permission to delete. Therefore, we won't make
	7384	* an effort to preserve that check now that we may not have a
	7385	* vp here.
	7386	*/
	7387
	7388	if (!batched) {
	7389	error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL);
	7390	if (error) {
	7391	if (error == ENOENT) {
	7392	assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	7393	if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	7394	restart_flag = 1;
	7395	restart_count += 1;
	7396	}
	7397	}
	7398	goto out;
	7399	}
	7400	}
	7401	} else {
	7402	batched = 1;
	7403
	7404	if (!vnode_compound_rmdir_available(dvp)) {
	7405	panic("No error, but no compound rmdir?");
	7406	}
	7407	}
	7408
	7409	#if CONFIG_FSE
	7410	fse_info finfo;
	7411
	7412	need_event = need_fsevent(FSE_DELETE, dvp);
	7413	if (need_event) {
	7414	if (!batched) {
	7415	get_fse_info(vp, &finfo, ctx);
	7416	} else {
	7417	error = vfs_get_notify_attributes(&va);
	7418	if (error) {
	7419	goto out;
	7420	}
	7421
	7422	vap = &va;
	7423	}
	7424	}
	7425	#endif
	7426	has_listeners = kauth_authorize_fileop_has_listeners();
	7427	if (need_event \|\| has_listeners) {
	7428	if (path == NULL) {
	7429	GET_PATH(path);
	7430	if (path == NULL) {
	7431	error = ENOMEM;
	7432	goto out;
	7433	}
	7434	}
	7435
	7436	len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated);
	7437	#if CONFIG_FSE
	7438	if (truncated) {
	7439	finfo.mode \|= FSE_TRUNCATED_PATH;
	7440	}
	7441	#endif
	7442	}
	7443
	7444	error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
	7445	nd.ni_vp = vp;
	7446	if (vp == NULLVP) {
	7447	/* Couldn't find a vnode */
	7448	goto out;
	7449	}
	7450
	7451	if (error == EKEEPLOOKING) {
	7452	goto continue_lookup;
	7453	} else if (batched && error == ENOENT) {
	7454	assert(restart_count < MAX_AUTHORIZE_ENOENT_RETRIES);
	7455	if (restart_count < MAX_AUTHORIZE_ENOENT_RETRIES) {
	7456	/*
	7457	* For compound VNOPs, the authorization callback
	7458	* may return ENOENT in case of racing hard link lookups
	7459	* redrive the lookup.
	7460	*/
	7461	restart_flag = 1;
	7462	restart_count += 1;
	7463	goto out;
	7464	}
	7465	}
	7466	#if CONFIG_APPLEDOUBLE
	7467	/*
	7468	* Special case to remove orphaned AppleDouble
	7469	* files. I don't like putting this in the kernel,
	7470	* but carbon does not like putting this in carbon either,
	7471	* so here we are.
	7472	*/
	7473	if (error == ENOTEMPTY) {
	7474	error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag);
	7475	if (error == EBUSY) {
	7476	goto out;
	7477	}
	7478
	7479
	7480	/*
	7481	* Assuming everything went well, we will try the RMDIR again
	7482	*/
	7483	if (!error)
	7484	error = vn_rmdir(dvp, &vp, &nd, vap, ctx);
	7485	}
	7486	#endif /* CONFIG_APPLEDOUBLE */
	7487	/*
	7488	* Call out to allow 3rd party notification of delete.
	7489	* Ignore result of kauth_authorize_fileop call.
	7490	*/
	7491	if (!error) {
	7492	if (has_listeners) {
	7493	kauth_authorize_fileop(vfs_context_ucred(ctx),
	7494	KAUTH_FILEOP_DELETE,
	7495	(uintptr_t)vp,
	7496	(uintptr_t)path);
	7497	}
	7498
	7499	if (vp->v_flag & VISHARDLINK) {
	7500	// see the comment in unlink1() about why we update
	7501	// the parent of a hard link when it is removed
	7502	vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT);
	7503	}
	7504
	7505	#if CONFIG_FSE
	7506	if (need_event) {
	7507	if (vap) {
	7508	vnode_get_fse_info_from_vap(vp, &finfo, vap);
	7509	}
	7510	add_fsevent(FSE_DELETE, ctx,
	7511	FSE_ARG_STRING, len, path,
	7512	FSE_ARG_FINFO, &finfo,
	7513	FSE_ARG_DONE);
	7514	}
	7515	#endif
	7516	}
	7517
	7518	out:
	7519	if (path != NULL) {
	7520	RELEASE_PATH(path);
	7521	path = NULL;
	7522	}
	7523	/*
	7524	* nameidone has to happen before we vnode_put(dvp)
	7525	* since it may need to release the fs_nodelock on the dvp
	7526	*/
	7527	nameidone(&nd);
	7528	vnode_put(dvp);
	7529
	7530	if (vp)
	7531	vnode_put(vp);
	7532
	7533	if (restart_flag == 0) {
	7534	wakeup_one((caddr_t)vp);
	7535	return (error);
	7536	}
	7537	tsleep(vp, PVFS, "rm AD", 1);
	7538
	7539	} while (restart_flag != 0);
	7540
	7541	return (error);
	7542
	7543	}
	7544
	7545	/*
	7546	* Remove a directory file.
	7547	*/
	7548	/* ARGSUSED */
	7549	int
	7550	rmdir(__unused proc_t p, struct rmdir_args uap, __unused int32_t retval)
	7551	{
	7552	return (rmdirat_internal(vfs_context_current(), AT_FDCWD,
	7553	CAST_USER_ADDR_T(uap->path), UIO_USERSPACE));
	7554	}
	7555
	7556	/* Get direntry length padded to 8 byte alignment */
	7557	#define DIRENT64_LEN(namlen) \
	7558	((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7)
	7559
	7560	errno_t
	7561	vnode_readdir64(struct vnode vp, struct uio uio, int flags, int *eofflag,
	7562	int *numdirent, vfs_context_t ctxp)
	7563	{
	7564	/* Check if fs natively supports VNODE_READDIR_EXTENDED */
	7565	if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) &&
	7566	((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) {
	7567	return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp);
	7568	} else {
	7569	size_t bufsize;
	7570	void * bufptr;
	7571	uio_t auio;
	7572	struct direntry *entry64;
	7573	struct dirent *dep;
	7574	int bytesread;
	7575	int error;
	7576
	7577	/*
	7578	* Our kernel buffer needs to be smaller since re-packing
	7579	* will expand each dirent. The worse case (when the name
	7580	* length is 3) corresponds to a struct direntry size of 32
	7581	* bytes (8-byte aligned) and a struct dirent size of 12 bytes
	7582	* (4-byte aligned). So having a buffer that is 3/8 the size
	7583	* will prevent us from reading more than we can pack.
	7584	*
	7585	* Since this buffer is wired memory, we will limit the
	7586	* buffer size to a maximum of 32K. We would really like to
	7587	* use 32K in the MIN(), but we use magic number 87371 to
	7588	* prevent uio_resid() * 3 / 8 from overflowing.
	7589	*/
	7590	bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8;
	7591	MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK);
	7592	if (bufptr == NULL) {
	7593	return ENOMEM;
	7594	}
	7595
	7596	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
	7597	uio_addiov(auio, (uintptr_t)bufptr, bufsize);
	7598	auio->uio_offset = uio->uio_offset;
	7599
	7600	error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp);
	7601
	7602	dep = (struct dirent *)bufptr;
	7603	bytesread = bufsize - uio_resid(auio);
	7604
	7605	MALLOC(entry64, struct direntry *, sizeof(struct direntry),
	7606	M_TEMP, M_WAITOK);
	7607	/*
	7608	* Convert all the entries and copy them out to user's buffer.
	7609	*/
	7610	while (error == 0 && (char )dep < ((char )bufptr + bytesread)) {
	7611	size_t enbufsize = DIRENT64_LEN(dep->d_namlen);
	7612
	7613	bzero(entry64, enbufsize);
	7614	/* Convert a dirent to a dirent64. */
	7615	entry64->d_ino = dep->d_ino;
	7616	entry64->d_seekoff = 0;
	7617	entry64->d_reclen = enbufsize;
	7618	entry64->d_namlen = dep->d_namlen;
	7619	entry64->d_type = dep->d_type;
	7620	bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1);
	7621
	7622	/* Move to next entry. */
	7623	dep = (struct dirent )((char )dep + dep->d_reclen);
	7624
	7625	/* Copy entry64 to user's buffer. */
	7626	error = uiomove((caddr_t)entry64, entry64->d_reclen, uio);
	7627	}
	7628
	7629	/* Update the real offset using the offset we got from VNOP_READDIR. */
	7630	if (error == 0) {
	7631	uio->uio_offset = auio->uio_offset;
	7632	}
	7633	uio_free(auio);
	7634	FREE(bufptr, M_TEMP);
	7635	FREE(entry64, M_TEMP);
	7636	return (error);
	7637	}
	7638	}
	7639
	7640	#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U)
	7641
	7642	/*
	7643	* Read a block of directory entries in a file system independent format.
	7644	*/
	7645	static int
	7646	getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread,
	7647	off_t *offset, int flags)
	7648	{
	7649	vnode_t vp;
	7650	struct vfs_context context = vfs_context_current(); / local copy */
	7651	struct fileproc *fp;
	7652	uio_t auio;
	7653	int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	7654	off_t loff;
	7655	int error, eofflag, numdirent;
	7656	char uio_buf[ UIO_SIZEOF(1) ];
	7657
	7658	error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp);
	7659	if (error) {
	7660	return (error);
	7661	}
	7662	if ((fp->f_fglob->fg_flag & FREAD) == 0) {
	7663	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
	7664	error = EBADF;
	7665	goto out;
	7666	}
	7667
	7668	if (bufsize > GETDIRENTRIES_MAXBUFSIZE)
	7669	bufsize = GETDIRENTRIES_MAXBUFSIZE;
	7670
	7671	#if CONFIG_MACF
	7672	error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob);
	7673	if (error)
	7674	goto out;
	7675	#endif
	7676	if ( (error = vnode_getwithref(vp)) ) {
	7677	goto out;
	7678	}
	7679	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	7680
	7681	unionread:
	7682	if (vp->v_type != VDIR) {
	7683	(void)vnode_put(vp);
	7684	error = EINVAL;
	7685	goto out;
	7686	}
	7687
	7688	#if CONFIG_MACF
	7689	error = mac_vnode_check_readdir(&context, vp);
	7690	if (error != 0) {
	7691	(void)vnode_put(vp);
	7692	goto out;
	7693	}
	7694	#endif /* MAC */
	7695
	7696	loff = fp->f_fglob->fg_offset;
	7697	auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
	7698	uio_addiov(auio, bufp, bufsize);
	7699
	7700	if (flags & VNODE_READDIR_EXTENDED) {
	7701	error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context);
	7702	fp->f_fglob->fg_offset = uio_offset(auio);
	7703	} else {
	7704	error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context);
	7705	fp->f_fglob->fg_offset = uio_offset(auio);
	7706	}
	7707	if (error) {
	7708	(void)vnode_put(vp);
	7709	goto out;
	7710	}
	7711
	7712	if ((user_ssize_t)bufsize == uio_resid(auio)){
	7713	if (union_dircheckp) {
	7714	error = union_dircheckp(&vp, fp, &context);
	7715	if (error == -1)
	7716	goto unionread;
	7717	if (error)
	7718	goto out;
	7719	}
	7720
	7721	if ((vp->v_mount->mnt_flag & MNT_UNION)) {
	7722	struct vnode *tvp = vp;
	7723	if (lookup_traverse_union(tvp, &vp, &context) == 0) {
	7724	vnode_ref(vp);
	7725	fp->f_fglob->fg_data = (caddr_t) vp;
	7726	fp->f_fglob->fg_offset = 0;
	7727	vnode_rele(tvp);
	7728	vnode_put(tvp);
	7729	goto unionread;
	7730	}
	7731	vp = tvp;
	7732	}
	7733	}
	7734
	7735	vnode_put(vp);
	7736	if (offset) {
	7737	*offset = loff;
	7738	}
	7739
	7740	*bytesread = bufsize - uio_resid(auio);
	7741	out:
	7742	file_drop(fd);
	7743	return (error);
	7744	}
	7745
	7746
	7747	int
	7748	getdirentries(__unused struct proc p, struct getdirentries_args uap, int32_t *retval)
	7749	{
	7750	off_t offset;
	7751	ssize_t bytesread;
	7752	int error;
	7753
	7754	AUDIT_ARG(fd, uap->fd);
	7755	error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0);
	7756
	7757	if (error == 0) {
	7758	if (proc_is64bit(p)) {
	7759	user64_long_t base = (user64_long_t)offset;
	7760	error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t));
	7761	} else {
	7762	user32_long_t base = (user32_long_t)offset;
	7763	error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t));
	7764	}
	7765	*retval = bytesread;
	7766	}
	7767	return (error);
	7768	}
	7769
	7770	int
	7771	getdirentries64(__unused struct proc p, struct getdirentries64_args uap, user_ssize_t *retval)
	7772	{
	7773	off_t offset;
	7774	ssize_t bytesread;
	7775	int error;
	7776
	7777	AUDIT_ARG(fd, uap->fd);
	7778	error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED);
	7779
	7780	if (error == 0) {
	7781	*retval = bytesread;
	7782	error = copyout((caddr_t)&offset, uap->position, sizeof(off_t));
	7783	}
	7784	return (error);
	7785	}
	7786
	7787
	7788	/*
	7789	* Set the mode mask for creation of filesystem nodes.
	7790	* XXX implement xsecurity
	7791	*/
	7792	#define UMASK_NOXSECURITY (void )1 / leave existing xsecurity alone */
	7793	static int
	7794	umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval)
	7795	{
	7796	struct filedesc *fdp;
	7797
	7798	AUDIT_ARG(mask, newmask);
	7799	proc_fdlock(p);
	7800	fdp = p->p_fd;
	7801	*retval = fdp->fd_cmask;
	7802	fdp->fd_cmask = newmask & ALLPERMS;
	7803	proc_fdunlock(p);
	7804	return (0);
	7805	}
	7806
	7807	/*
	7808	* umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL).
	7809	*
	7810	* Parameters: p Process requesting to set the umask
	7811	* uap User argument descriptor (see below)
	7812	* retval umask of the process (parameter p)
	7813	*
	7814	* Indirect: uap->newmask umask to set
	7815	* uap->xsecurity ACL to set
	7816	*
	7817	* Returns: 0 Success
	7818	* !0 Not success
	7819	*
	7820	*/
	7821	int
	7822	umask_extended(proc_t p, struct umask_extended_args uap, int32_t retval)
	7823	{
	7824	int ciferror;
	7825	kauth_filesec_t xsecdst;
	7826
	7827	xsecdst = KAUTH_FILESEC_NONE;
	7828	if (uap->xsecurity != USER_ADDR_NULL) {
	7829	if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)
	7830	return ciferror;
	7831	} else {
	7832	xsecdst = KAUTH_FILESEC_NONE;
	7833	}
	7834
	7835	ciferror = umask1(p, uap->newmask, xsecdst, retval);
	7836
	7837	if (xsecdst != KAUTH_FILESEC_NONE)
	7838	kauth_filesec_free(xsecdst);
	7839	return ciferror;
	7840	}
	7841
	7842	int
	7843	umask(proc_t p, struct umask_args uap, int32_t retval)
	7844	{
	7845	return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval));
	7846	}
	7847
	7848	/*
	7849	* Void all references to file by ripping underlying filesystem
	7850	* away from vnode.
	7851	*/
	7852	/* ARGSUSED */
	7853	int
	7854	revoke(proc_t p, struct revoke_args uap, __unused int32_t retval)
	7855	{
	7856	vnode_t vp;
	7857	struct vnode_attr va;
	7858	vfs_context_t ctx = vfs_context_current();
	7859	int error;
	7860	struct nameidata nd;
	7861
	7862	NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW \| AUDITVNPATH1, UIO_USERSPACE,
	7863	uap->path, ctx);
	7864	error = namei(&nd);
	7865	if (error)
	7866	return (error);
	7867	vp = nd.ni_vp;
	7868
	7869	nameidone(&nd);
	7870
	7871	if (!(vnode_ischr(vp) \|\| vnode_isblk(vp))) {
	7872	error = ENOTSUP;
	7873	goto out;
	7874	}
	7875
	7876	if (vnode_isblk(vp) && vnode_ismountedon(vp)) {
	7877	error = EBUSY;
	7878	goto out;
	7879	}
	7880
	7881	#if CONFIG_MACF
	7882	error = mac_vnode_check_revoke(ctx, vp);
	7883	if (error)
	7884	goto out;
	7885	#endif
	7886
	7887	VATTR_INIT(&va);
	7888	VATTR_WANTED(&va, va_uid);
	7889	if ((error = vnode_getattr(vp, &va, ctx)))
	7890	goto out;
	7891	if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid &&
	7892	(error = suser(vfs_context_ucred(ctx), &p->p_acflag)))
	7893	goto out;
	7894	if (vp->v_usecount > 0 \|\| (vnode_isaliased(vp)))
	7895	VNOP_REVOKE(vp, REVOKEALL, ctx);
	7896	out:
	7897	vnode_put(vp);
	7898	return (error);
	7899	}
	7900
	7901
	7902	/*
	7903	* HFS/HFS PlUS SPECIFIC SYSTEM CALLS
	7904	* The following system calls are designed to support features
	7905	* which are specific to the HFS & HFS Plus volume formats
	7906	*/
	7907
	7908
	7909	/*
	7910	* Obtain attribute information on objects in a directory while enumerating
	7911	* the directory.
	7912	*/
	7913	/* ARGSUSED */
	7914	int
	7915	getdirentriesattr (proc_t p, struct getdirentriesattr_args uap, int32_t retval)
	7916	{
	7917	vnode_t vp;
	7918	struct fileproc *fp;
	7919	uio_t auio = NULL;
	7920	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	7921	uint32_t count, savecount;
	7922	uint32_t newstate;
	7923	int error, eofflag;
	7924	uint32_t loff;
	7925	struct attrlist attributelist;
	7926	vfs_context_t ctx = vfs_context_current();
	7927	int fd = uap->fd;
	7928	char uio_buf[ UIO_SIZEOF(1) ];
	7929	kauth_action_t action;
	7930
	7931	AUDIT_ARG(fd, fd);
	7932
	7933	/* Get the attributes into kernel space */
	7934	if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) {
	7935	return(error);
	7936	}
	7937	if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) {
	7938	return(error);
	7939	}
	7940	savecount = count;
	7941	if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) {
	7942	return (error);
	7943	}
	7944	if ((fp->f_fglob->fg_flag & FREAD) == 0) {
	7945	AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1);
	7946	error = EBADF;
	7947	goto out;
	7948	}
	7949
	7950
	7951	#if CONFIG_MACF
	7952	error = mac_file_check_change_offset(vfs_context_ucred(ctx),
	7953	fp->f_fglob);
	7954	if (error)
	7955	goto out;
	7956	#endif
	7957
	7958
	7959	if ( (error = vnode_getwithref(vp)) )
	7960	goto out;
	7961
	7962	AUDIT_ARG(vnpath, vp, ARG_VNODE1);
	7963
	7964	unionread:
	7965	if (vp->v_type != VDIR) {
	7966	(void)vnode_put(vp);
	7967	error = EINVAL;
	7968	goto out;
	7969	}
	7970
	7971	#if CONFIG_MACF
	7972	error = mac_vnode_check_readdir(ctx, vp);
	7973	if (error != 0) {
	7974	(void)vnode_put(vp);
	7975	goto out;
	7976	}
	7977	#endif /* MAC */
	7978
	7979	/* set up the uio structure which will contain the users return buffer */
	7980	loff = fp->f_fglob->fg_offset;
	7981	auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
	7982	uio_addiov(auio, uap->buffer, uap->buffersize);
	7983
	7984	/*
	7985	* If the only item requested is file names, we can let that past with
	7986	* just LIST_DIRECTORY. If they want any other attributes, that means
	7987	* they need SEARCH as well.
	7988	*/
	7989	action = KAUTH_VNODE_LIST_DIRECTORY;
	7990	if ((attributelist.commonattr & ~ATTR_CMN_NAME) \|\|
	7991	attributelist.fileattr \|\| attributelist.dirattr)
	7992	action \|= KAUTH_VNODE_SEARCH;
	7993
	7994	if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) {
	7995
	7996	/* Believe it or not, uap->options only has 32-bits of valid
	7997	* info, so truncate before extending again */
	7998
	7999	error = VNOP_READDIRATTR(vp, &attributelist, auio, count,
	8000	(u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx);
	8001	}
	8002
	8003	if (error) {
	8004	(void) vnode_put(vp);
	8005	goto out;
	8006	}
	8007
	8008	/*
	8009	* If we've got the last entry of a directory in a union mount
	8010	* then reset the eofflag and pretend there's still more to come.
	8011	* The next call will again set eofflag and the buffer will be empty,
	8012	* so traverse to the underlying directory and do the directory
	8013	* read there.
	8014	*/
	8015	if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) {
	8016	if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries
	8017	eofflag = 0;
	8018	} else { // Empty buffer
	8019	struct vnode *tvp = vp;
	8020	if (lookup_traverse_union(tvp, &vp, ctx) == 0) {
	8021	vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0);
	8022	fp->f_fglob->fg_data = (caddr_t) vp;
	8023	fp->f_fglob->fg_offset = 0; // reset index for new dir
	8024	count = savecount;
	8025	vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0);
	8026	vnode_put(tvp);
	8027	goto unionread;
	8028	}
	8029	vp = tvp;
	8030	}
	8031	}
	8032
	8033	(void)vnode_put(vp);
	8034
	8035	if (error)
	8036	goto out;
	8037	fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */
	8038
	8039	if ((error = copyout((caddr_t) &count, uap->count, sizeof(count))))
	8040	goto out;
	8041	if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate))))
	8042	goto out;
	8043	if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff))))
	8044	goto out;
	8045
	8046	retval = eofflag; / similar to getdirentries */
	8047	error = 0;
	8048	out:
	8049	file_drop(fd);
	8050	return (error); /* return error earlier, an retval of 0 or 1 now */
	8051
	8052	} /* end of getdirentriesattr system call */
	8053
	8054	/*
	8055	* Exchange data between two files
	8056	*/
	8057
	8058	/* ARGSUSED */
	8059	int
	8060	exchangedata (__unused proc_t p, struct exchangedata_args uap, __unused int32_t retval)
	8061	{
	8062
	8063	struct nameidata fnd, snd;
	8064	vfs_context_t ctx = vfs_context_current();
	8065	vnode_t fvp;
	8066	vnode_t svp;
	8067	int error;
	8068	u_int32_t nameiflags;
	8069	char *fpath = NULL;
	8070	char *spath = NULL;
	8071	int flen=0, slen=0;
	8072	int from_truncated=0, to_truncated=0;
	8073	#if CONFIG_FSE
	8074	fse_info f_finfo, s_finfo;
	8075	#endif
	8076
	8077	nameiflags = 0;
	8078	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags \|= FOLLOW;
	8079
	8080	NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags \| AUDITVNPATH1,
	8081	UIO_USERSPACE, uap->path1, ctx);
	8082
	8083	error = namei(&fnd);
	8084	if (error)
	8085	goto out2;
	8086
	8087	nameidone(&fnd);
	8088	fvp = fnd.ni_vp;
	8089
	8090	NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK \| nameiflags \| AUDITVNPATH2,
	8091	UIO_USERSPACE, uap->path2, ctx);
	8092
	8093	error = namei(&snd);
	8094	if (error) {
	8095	vnode_put(fvp);
	8096	goto out2;
	8097	}
	8098	nameidone(&snd);
	8099	svp = snd.ni_vp;
	8100
	8101	/*
	8102	* if the files are the same, return an inval error
	8103	*/
	8104	if (svp == fvp) {
	8105	error = EINVAL;
	8106	goto out;
	8107	}
	8108
	8109	/*
	8110	* if the files are on different volumes, return an error
	8111	*/
	8112	if (svp->v_mount != fvp->v_mount) {
	8113	error = EXDEV;
	8114	goto out;
	8115	}
	8116
	8117	/* If they're not files, return an error */
	8118	if ( (vnode_isreg(fvp) == 0) \|\| (vnode_isreg(svp) == 0)) {
	8119	error = EINVAL;
	8120	goto out;
	8121	}
	8122
	8123	#if CONFIG_MACF
	8124	error = mac_vnode_check_exchangedata(ctx,
	8125	fvp, svp);
	8126	if (error)
	8127	goto out;
	8128	#endif
	8129	if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA \| KAUTH_VNODE_WRITE_DATA, ctx)) != 0) \|\|
	8130	((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA \| KAUTH_VNODE_WRITE_DATA, ctx)) != 0))
	8131	goto out;
	8132
	8133	if (
	8134	#if CONFIG_FSE
	8135	need_fsevent(FSE_EXCHANGE, fvp) \|\|
	8136	#endif
	8137	kauth_authorize_fileop_has_listeners()) {
	8138	GET_PATH(fpath);
	8139	GET_PATH(spath);
	8140	if (fpath == NULL \|\| spath == NULL) {
	8141	error = ENOMEM;
	8142	goto out;
	8143	}
	8144
	8145	flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated);
	8146	slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated);
	8147
	8148	#if CONFIG_FSE
	8149	get_fse_info(fvp, &f_finfo, ctx);
	8150	get_fse_info(svp, &s_finfo, ctx);
	8151	if (from_truncated \|\| to_truncated) {
	8152	// set it here since only the f_finfo gets reported up to user space
	8153	f_finfo.mode \|= FSE_TRUNCATED_PATH;
	8154	}
	8155	#endif
	8156	}
	8157	/* Ok, make the call */
	8158	error = VNOP_EXCHANGE(fvp, svp, 0, ctx);
	8159
	8160	if (error == 0) {
	8161	const char *tmpname;
	8162
	8163	if (fpath != NULL && spath != NULL) {
	8164	/* call out to allow 3rd party notification of exchangedata.
	8165	* Ignore result of kauth_authorize_fileop call.
	8166	*/
	8167	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE,
	8168	(uintptr_t)fpath, (uintptr_t)spath);
	8169	}
	8170	name_cache_lock();
	8171
	8172	tmpname = fvp->v_name;
	8173	fvp->v_name = svp->v_name;
	8174	svp->v_name = tmpname;
	8175
	8176	if (fvp->v_parent != svp->v_parent) {
	8177	vnode_t tmp;
	8178
	8179	tmp = fvp->v_parent;
	8180	fvp->v_parent = svp->v_parent;
	8181	svp->v_parent = tmp;
	8182	}
	8183	name_cache_unlock();
	8184
	8185	#if CONFIG_FSE
	8186	if (fpath != NULL && spath != NULL) {
	8187	add_fsevent(FSE_EXCHANGE, ctx,
	8188	FSE_ARG_STRING, flen, fpath,
	8189	FSE_ARG_FINFO, &f_finfo,
	8190	FSE_ARG_STRING, slen, spath,
	8191	FSE_ARG_FINFO, &s_finfo,
	8192	FSE_ARG_DONE);
	8193	}
	8194	#endif
	8195	}
	8196
	8197	out:
	8198	if (fpath != NULL)
	8199	RELEASE_PATH(fpath);
	8200	if (spath != NULL)
	8201	RELEASE_PATH(spath);
	8202	vnode_put(svp);
	8203	vnode_put(fvp);
	8204	out2:
	8205	return (error);
	8206	}
	8207
	8208	/*
	8209	* Return (in MB) the amount of freespace on the given vnode's volume.
	8210	*/
	8211	uint32_t freespace_mb(vnode_t vp);
	8212
	8213	uint32_t
	8214	freespace_mb(vnode_t vp)
	8215	{
	8216	vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT);
	8217	return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail *
	8218	vp->v_mount->mnt_vfsstat.f_bsize) >> 20);
	8219	}
	8220
	8221	#if CONFIG_SEARCHFS
	8222
	8223	/* ARGSUSED */
	8224
	8225	int
	8226	searchfs(proc_t p, struct searchfs_args uap, __unused int32_t retval)
	8227	{
	8228	vnode_t vp, tvp;
	8229	int i, error=0;
	8230	int fserror = 0;
	8231	struct nameidata nd;
	8232	struct user64_fssearchblock searchblock;
	8233	struct searchstate *state;
	8234	struct attrlist *returnattrs;
	8235	struct timeval timelimit;
	8236	void searchparams1,searchparams2;
	8237	uio_t auio = NULL;
	8238	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	8239	uint32_t nummatches;
	8240	int mallocsize;
	8241	uint32_t nameiflags;
	8242	vfs_context_t ctx = vfs_context_current();
	8243	char uio_buf[ UIO_SIZEOF(1) ];
	8244
	8245	/* Start by copying in fsearchblock parameter list */
	8246	if (IS_64BIT_PROCESS(p)) {
	8247	error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock));
	8248	timelimit.tv_sec = searchblock.timelimit.tv_sec;
	8249	timelimit.tv_usec = searchblock.timelimit.tv_usec;
	8250	}
	8251	else {
	8252	struct user32_fssearchblock tmp_searchblock;
	8253
	8254	error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock));
	8255	// munge into 64-bit version
	8256	searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs);
	8257	searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer);
	8258	searchblock.returnbuffersize = tmp_searchblock.returnbuffersize;
	8259	searchblock.maxmatches = tmp_searchblock.maxmatches;
	8260	/*
	8261	* These casts are safe. We will promote the tv_sec into a 64 bit long if necessary
	8262	* from a 32 bit long, and tv_usec is already a signed 32 bit int.
	8263	*/
	8264	timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec;
	8265	timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec;
	8266	searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1);
	8267	searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1;
	8268	searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2);
	8269	searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2;
	8270	searchblock.searchattrs = tmp_searchblock.searchattrs;
	8271	}
	8272	if (error)
	8273	return(error);
	8274
	8275	/* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2.
	8276	*/
	8277	if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS \|\|
	8278	searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS)
	8279	return(EINVAL);
	8280
	8281	/* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */
	8282	/* It all has to do into local memory and it's not that big so we might as well put it all together. */
	8283	/* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/
	8284	/* block. */
	8285	/* */
	8286	/* NOTE: we allocate an extra 8 bytes to account for the difference in size of the searchstate */
	8287	/* due to the changes in rdar://problem/12438273. That way if a 3rd party file system */
	8288	/* assumes the size is still 556 bytes it will continue to work */
	8289
	8290	mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 +
	8291	sizeof(struct attrlist) + sizeof(struct searchstate) + (2*sizeof(uint32_t));
	8292
	8293	MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK);
	8294
	8295	/* Now set up the various pointers to the correct place in our newly allocated memory */
	8296
	8297	searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1);
	8298	returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2);
	8299	state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist));
	8300
	8301	/* Now copy in the stuff given our local variables. */
	8302
	8303	if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1)))
	8304	goto freeandexit;
	8305
	8306	if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2)))
	8307	goto freeandexit;
	8308
	8309	if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist))))
	8310	goto freeandexit;
	8311
	8312	if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate))))
	8313	goto freeandexit;
	8314
	8315	/*
	8316	* When searching a union mount, need to set the
	8317	* start flag at the first call on each layer to
	8318	* reset state for the new volume.
	8319	*/
	8320	if (uap->options & SRCHFS_START)
	8321	state->ss_union_layer = 0;
	8322	else
	8323	uap->options \|= state->ss_union_flags;
	8324	state->ss_union_flags = 0;
	8325
	8326	/*
	8327	* Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter,
	8328	* which is passed in with an attrreference_t, we need to inspect the buffer manually here.
	8329	* The KPI does not provide us the ability to pass in the length of the buffers searchparams1
	8330	* and searchparams2. To obviate the need for all searchfs-supporting filesystems to
	8331	* validate the user-supplied data offset of the attrreference_t, we'll do it here.
	8332	*/
	8333
	8334	if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) {
	8335	attrreference_t* string_ref;
	8336	u_int32_t* start_length;
	8337	user64_size_t param_length;
	8338
	8339	/* validate searchparams1 */
	8340	param_length = searchblock.sizeofsearchparams1;
	8341	/* skip the word that specifies length of the buffer */
	8342	start_length= (u_int32_t*) searchparams1;
	8343	start_length= start_length+1;
	8344	string_ref= (attrreference_t*) start_length;
	8345
	8346	/* ensure no negative offsets or too big offsets */
	8347	if (string_ref->attr_dataoffset < 0 ) {
	8348	error = EINVAL;
	8349	goto freeandexit;
	8350	}
	8351	if (string_ref->attr_length > MAXPATHLEN) {
	8352	error = EINVAL;
	8353	goto freeandexit;
	8354	}
	8355
	8356	/* Check for pointer overflow in the string ref */
	8357	if (((char) string_ref + string_ref->attr_dataoffset) < (char) string_ref) {
	8358	error = EINVAL;
	8359	goto freeandexit;
	8360	}
	8361
	8362	if (((char) string_ref + string_ref->attr_dataoffset) > ((char)searchparams1 + param_length)) {
	8363	error = EINVAL;
	8364	goto freeandexit;
	8365	}
	8366	if (((char)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char)searchparams1 + param_length)) {
	8367	error = EINVAL;
	8368	goto freeandexit;
	8369	}
	8370	}
	8371
	8372	/* set up the uio structure which will contain the users return buffer */
	8373	auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf));
	8374	uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize);
	8375
	8376	nameiflags = 0;
	8377	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags \|= FOLLOW;
	8378	NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags \| AUDITVNPATH1,
	8379	UIO_USERSPACE, uap->path, ctx);
	8380
	8381	error = namei(&nd);
	8382	if (error)
	8383	goto freeandexit;
	8384	vp = nd.ni_vp;
	8385	nameidone(&nd);
	8386
	8387	/*
	8388	* Switch to the root vnode for the volume
	8389	*/
	8390	error = VFS_ROOT(vnode_mount(vp), &tvp, ctx);
	8391	vnode_put(vp);
	8392	if (error)
	8393	goto freeandexit;
	8394	vp = tvp;
	8395
	8396	/*
	8397	* If it's a union mount, the path lookup takes
	8398	* us to the top layer. But we may need to descend
	8399	* to a lower layer. For non-union mounts the layer
	8400	* is always zero.
	8401	*/
	8402	for (i = 0; i < (int) state->ss_union_layer; i++) {
	8403	if ((vp->v_mount->mnt_flag & MNT_UNION) == 0)
	8404	break;
	8405	tvp = vp;
	8406	vp = vp->v_mount->mnt_vnodecovered;
	8407	if (vp == NULL) {
	8408	vnode_put(tvp);
	8409	error = ENOENT;
	8410	goto freeandexit;
	8411	}
	8412	vnode_getwithref(vp);
	8413	vnode_put(tvp);
	8414	}
	8415
	8416	#if CONFIG_MACF
	8417	error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs);
	8418	if (error) {
	8419	vnode_put(vp);
	8420	goto freeandexit;
	8421	}
	8422	#endif
	8423
	8424
	8425	/*
	8426	* If searchblock.maxmatches == 0, then skip the search. This has happened
	8427	* before and sometimes the underlying code doesnt deal with it well.
	8428	*/
	8429	if (searchblock.maxmatches == 0) {
	8430	nummatches = 0;
	8431	goto saveandexit;
	8432	}
	8433
	8434	/*
	8435	* Allright, we have everything we need, so lets make that call.
	8436	*
	8437	* We keep special track of the return value from the file system:
	8438	* EAGAIN is an acceptable error condition that shouldn't keep us
	8439	* from copying out any results...
	8440	*/
	8441
	8442	fserror = VNOP_SEARCHFS(vp,
	8443	searchparams1,
	8444	searchparams2,
	8445	&searchblock.searchattrs,
	8446	(u_long)searchblock.maxmatches,
	8447	&timelimit,
	8448	returnattrs,
	8449	&nummatches,
	8450	(u_long)uap->scriptcode,
	8451	(u_long)uap->options,
	8452	auio,
	8453	(struct searchstate *) &state->ss_fsstate,
	8454	ctx);
	8455
	8456	/*
	8457	* If it's a union mount we need to be called again
	8458	* to search the mounted-on filesystem.
	8459	*/
	8460	if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) {
	8461	state->ss_union_flags = SRCHFS_START;
	8462	state->ss_union_layer++; // search next layer down
	8463	fserror = EAGAIN;
	8464	}
	8465
	8466	saveandexit:
	8467
	8468	vnode_put(vp);
	8469
	8470	/* Now copy out the stuff that needs copying out. That means the number of matches, the
	8471	search state. Everything was already put into he return buffer by the vop call. */
	8472
	8473	if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0)
	8474	goto freeandexit;
	8475
	8476	if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0)
	8477	goto freeandexit;
	8478
	8479	error = fserror;
	8480
	8481	freeandexit:
	8482
	8483	FREE(searchparams1,M_TEMP);
	8484
	8485	return(error);
	8486
	8487
	8488	} /* end of searchfs system call */
	8489
	8490	#else /* CONFIG_SEARCHFS */
	8491
	8492	int
	8493	searchfs(__unused proc_t p, __unused struct searchfs_args uap, __unused int32_t retval)
	8494	{
	8495	return (ENOTSUP);
	8496	}
	8497
	8498	#endif /* CONFIG_SEARCHFS */
	8499
	8500
	8501	lck_grp_attr_t * nspace_group_attr;
	8502	lck_attr_t * nspace_lock_attr;
	8503	lck_grp_t * nspace_mutex_group;
	8504
	8505	lck_mtx_t nspace_handler_lock;
	8506	lck_mtx_t nspace_handler_exclusion_lock;
	8507
	8508	time_t snapshot_timestamp=0;
	8509	int nspace_allow_virtual_devs=0;
	8510
	8511	void nspace_handler_init(void);
	8512
	8513	typedef struct nspace_item_info {
	8514	struct vnode *vp;
	8515	void *arg;
	8516	uint64_t op;
	8517	uint32_t vid;
	8518	uint32_t flags;
	8519	uint32_t token;
	8520	uint32_t refcount;
	8521	} nspace_item_info;
	8522
	8523	#define MAX_NSPACE_ITEMS 128
	8524	nspace_item_info nspace_items[MAX_NSPACE_ITEMS];
	8525	uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address
	8526	uint32_t nspace_token_id=0;
	8527	uint32_t nspace_handler_timeout = 15; // seconds
	8528
	8529	#define NSPACE_ITEM_NEW 0x0001
	8530	#define NSPACE_ITEM_PROCESSING 0x0002
	8531	#define NSPACE_ITEM_DEAD 0x0004
	8532	#define NSPACE_ITEM_CANCELLED 0x0008
	8533	#define NSPACE_ITEM_DONE 0x0010
	8534	#define NSPACE_ITEM_RESET_TIMER 0x0020
	8535
	8536	#define NSPACE_ITEM_NSPACE_EVENT 0x0040
	8537	#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080
	8538
	8539	#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT \| NSPACE_ITEM_SNAPSHOT_EVENT)
	8540
	8541	//#pragma optimization_level 0
	8542
	8543	typedef enum {
	8544	NSPACE_HANDLER_NSPACE = 0,
	8545	NSPACE_HANDLER_SNAPSHOT = 1,
	8546
	8547	NSPACE_HANDLER_COUNT,
	8548	} nspace_type_t;
	8549
	8550	typedef struct {
	8551	uint64_t handler_tid;
	8552	struct proc *handler_proc;
	8553	int handler_busy;
	8554	} nspace_handler_t;
	8555
	8556	nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT];
	8557
	8558	/* namespace fsctl functions */
	8559	static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type);
	8560	static int nspace_item_flags_for_type(nspace_type_t nspace_type);
	8561	static int nspace_open_flags_for_type(nspace_type_t nspace_type);
	8562	static nspace_type_t nspace_type_for_op(uint64_t op);
	8563	static int nspace_is_special_process(struct proc *proc);
	8564	static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx);
	8565	static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type);
	8566	static int validate_namespace_args (int is64bit, int size);
	8567	static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data);
	8568
	8569
	8570	static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type)
	8571	{
	8572	switch(nspace_type) {
	8573	case NSPACE_HANDLER_NSPACE:
	8574	return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT;
	8575	case NSPACE_HANDLER_SNAPSHOT:
	8576	return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT;
	8577	default:
	8578	printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type);
	8579	return 0;
	8580	}
	8581	}
	8582
	8583	static inline int nspace_item_flags_for_type(nspace_type_t nspace_type)
	8584	{
	8585	switch(nspace_type) {
	8586	case NSPACE_HANDLER_NSPACE:
	8587	return NSPACE_ITEM_NSPACE_EVENT;
	8588	case NSPACE_HANDLER_SNAPSHOT:
	8589	return NSPACE_ITEM_SNAPSHOT_EVENT;
	8590	default:
	8591	printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type);
	8592	return 0;
	8593	}
	8594	}
	8595
	8596	static inline int nspace_open_flags_for_type(nspace_type_t nspace_type)
	8597	{
	8598	switch(nspace_type) {
	8599	case NSPACE_HANDLER_NSPACE:
	8600	return FREAD \| FWRITE \| O_EVTONLY;
	8601	case NSPACE_HANDLER_SNAPSHOT:
	8602	return FREAD \| O_EVTONLY;
	8603	default:
	8604	printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type);
	8605	return 0;
	8606	}
	8607	}
	8608
	8609	static inline nspace_type_t nspace_type_for_op(uint64_t op)
	8610	{
	8611	switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) {
	8612	case NAMESPACE_HANDLER_NSPACE_EVENT:
	8613	return NSPACE_HANDLER_NSPACE;
	8614	case NAMESPACE_HANDLER_SNAPSHOT_EVENT:
	8615	return NSPACE_HANDLER_SNAPSHOT;
	8616	default:
	8617	printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK);
	8618	return NSPACE_HANDLER_NSPACE;
	8619	}
	8620	}
	8621
	8622	static inline int nspace_is_special_process(struct proc *proc)
	8623	{
	8624	int i;
	8625	for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
	8626	if (proc == nspace_handlers[i].handler_proc)
	8627	return 1;
	8628	}
	8629	return 0;
	8630	}
	8631
	8632	void
	8633	nspace_handler_init(void)
	8634	{
	8635	nspace_lock_attr = lck_attr_alloc_init();
	8636	nspace_group_attr = lck_grp_attr_alloc_init();
	8637	nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr);
	8638	lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr);
	8639	lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr);
	8640	memset(&nspace_items[0], 0, sizeof(nspace_items));
	8641	}
	8642
	8643	void
	8644	nspace_proc_exit(struct proc *p)
	8645	{
	8646	int i, event_mask = 0;
	8647
	8648	for (i = 0; i < NSPACE_HANDLER_COUNT; i++) {
	8649	if (p == nspace_handlers[i].handler_proc) {
	8650	event_mask \|= nspace_item_flags_for_type(i);
	8651	nspace_handlers[i].handler_tid = 0;
	8652	nspace_handlers[i].handler_proc = NULL;
	8653	}
	8654	}
	8655
	8656	if (event_mask == 0) {
	8657	return;
	8658	}
	8659
	8660	if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) {
	8661	// if this process was the snapshot handler, zero snapshot_timeout
	8662	snapshot_timestamp = 0;
	8663	}
	8664
	8665	//
	8666	// unblock anyone that's waiting for the handler that died
	8667	//
	8668	lck_mtx_lock(&nspace_handler_lock);
	8669	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	8670	if (nspace_items[i].flags & (NSPACE_ITEM_NEW \| NSPACE_ITEM_PROCESSING)) {
	8671
	8672	if ( nspace_items[i].flags & event_mask ) {
	8673
	8674	if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
	8675	vnode_lock_spin(nspace_items[i].vp);
	8676	nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
	8677	vnode_unlock(nspace_items[i].vp);
	8678	}
	8679	nspace_items[i].vp = NULL;
	8680	nspace_items[i].vid = 0;
	8681	nspace_items[i].flags = NSPACE_ITEM_DONE;
	8682	nspace_items[i].token = 0;
	8683
	8684	wakeup((caddr_t)&(nspace_items[i].vp));
	8685	}
	8686	}
	8687	}
	8688
	8689	wakeup((caddr_t)&nspace_item_idx);
	8690	lck_mtx_unlock(&nspace_handler_lock);
	8691	}
	8692
	8693
	8694	int
	8695	resolve_nspace_item(struct vnode *vp, uint64_t op)
	8696	{
	8697	return resolve_nspace_item_ext(vp, op, NULL);
	8698	}
	8699
	8700	int
	8701	resolve_nspace_item_ext(struct vnode vp, uint64_t op, void arg)
	8702	{
	8703	int i, error, keep_waiting;
	8704	struct timespec ts;
	8705	nspace_type_t nspace_type = nspace_type_for_op(op);
	8706
	8707	// only allow namespace events on regular files, directories and symlinks.
	8708	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
	8709	return 0;
	8710	}
	8711
	8712	//
	8713	// if this is a snapshot event and the vnode is on a
	8714	// disk image just pretend nothing happened since any
	8715	// change to the disk image will cause the disk image
	8716	// itself to get backed up and this avoids multi-way
	8717	// deadlocks between the snapshot handler and the ever
	8718	// popular diskimages-helper process. the variable
	8719	// nspace_allow_virtual_devs allows this behavior to
	8720	// be overridden (for use by the Mobile TimeMachine
	8721	// testing infrastructure which uses disk images)
	8722	//
	8723	if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT)
	8724	&& (vp->v_mount != NULL)
	8725	&& (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV)
	8726	&& !nspace_allow_virtual_devs) {
	8727
	8728	return 0;
	8729	}
	8730
	8731	// if (thread_tid(current_thread()) == namespace_handler_tid) {
	8732	if (nspace_handlers[nspace_type].handler_proc == NULL) {
	8733	return 0;
	8734	}
	8735
	8736	if (nspace_is_special_process(current_proc())) {
	8737	return EDEADLK;
	8738	}
	8739
	8740	lck_mtx_lock(&nspace_handler_lock);
	8741
	8742	retry:
	8743	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	8744	if (vp == nspace_items[i].vp && op == nspace_items[i].op) {
	8745	break;
	8746	}
	8747	}
	8748
	8749	if (i >= MAX_NSPACE_ITEMS) {
	8750	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	8751	if (nspace_items[i].flags == 0) {
	8752	break;
	8753	}
	8754	}
	8755	} else {
	8756	nspace_items[i].refcount++;
	8757	}
	8758
	8759	if (i >= MAX_NSPACE_ITEMS) {
	8760	ts.tv_sec = nspace_handler_timeout;
	8761	ts.tv_nsec = 0;
	8762
	8763	error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS\|PCATCH, "nspace-no-space", &ts);
	8764	if (error == 0) {
	8765	// an entry got free'd up, go see if we can get a slot
	8766	goto retry;
	8767	} else {
	8768	lck_mtx_unlock(&nspace_handler_lock);
	8769	return error;
	8770	}
	8771	}
	8772
	8773	//
	8774	// if it didn't already exist, add it. if it did exist
	8775	// we'll get woken up when someone does a wakeup() on
	8776	// the slot in the nspace_items table.
	8777	//
	8778	if (vp != nspace_items[i].vp) {
	8779	nspace_items[i].vp = vp;
	8780	nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user
	8781	nspace_items[i].op = op;
	8782	nspace_items[i].vid = vnode_vid(vp);
	8783	nspace_items[i].flags = NSPACE_ITEM_NEW;
	8784	nspace_items[i].flags \|= nspace_item_flags_for_type(nspace_type);
	8785	if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) {
	8786	if (arg) {
	8787	vnode_lock_spin(vp);
	8788	vp->v_flag \|= VNEEDSSNAPSHOT;
	8789	vnode_unlock(vp);
	8790	}
	8791	}
	8792
	8793	nspace_items[i].token = 0;
	8794	nspace_items[i].refcount = 1;
	8795
	8796	wakeup((caddr_t)&nspace_item_idx);
	8797	}
	8798
	8799	//
	8800	// Now go to sleep until the handler does a wakeup on this
	8801	// slot in the nspace_items table (or we timeout).
	8802	//
	8803	keep_waiting = 1;
	8804	while(keep_waiting) {
	8805	ts.tv_sec = nspace_handler_timeout;
	8806	ts.tv_nsec = 0;
	8807	error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS\|PCATCH, "namespace-done", &ts);
	8808
	8809	if (nspace_items[i].flags & NSPACE_ITEM_DONE) {
	8810	error = 0;
	8811	} else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) {
	8812	error = nspace_items[i].token;
	8813	} else if (error == EWOULDBLOCK \|\| error == ETIMEDOUT) {
	8814	if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) {
	8815	nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER;
	8816	continue;
	8817	} else {
	8818	error = ETIMEDOUT;
	8819	}
	8820	} else if (error == 0) {
	8821	// hmmm, why did we get woken up?
	8822	printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n",
	8823	nspace_items[i].token);
	8824	}
	8825
	8826	if (--nspace_items[i].refcount == 0) {
	8827	nspace_items[i].vp = NULL; // clear this so that no one will match on it again
	8828	nspace_items[i].arg = NULL;
	8829	nspace_items[i].token = 0; // clear this so that the handler will not find it anymore
	8830	nspace_items[i].flags = 0; // this clears it for re-use
	8831	}
	8832	wakeup(&nspace_token_id);
	8833	keep_waiting = 0;
	8834	}
	8835
	8836	lck_mtx_unlock(&nspace_handler_lock);
	8837
	8838	return error;
	8839	}
	8840
	8841
	8842	int
	8843	get_nspace_item_status(struct vnode vp, int32_t status)
	8844	{
	8845	int i;
	8846
	8847	lck_mtx_lock(&nspace_handler_lock);
	8848	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	8849	if (nspace_items[i].vp == vp) {
	8850	break;
	8851	}
	8852	}
	8853
	8854	if (i >= MAX_NSPACE_ITEMS) {
	8855	lck_mtx_unlock(&nspace_handler_lock);
	8856	return ENOENT;
	8857	}
	8858
	8859	*status = nspace_items[i].flags;
	8860	lck_mtx_unlock(&nspace_handler_lock);
	8861	return 0;
	8862	}
	8863
	8864
	8865	#if 0
	8866	static int
	8867	build_volfs_path(struct vnode vp, char path, int *len)
	8868	{
	8869	struct vnode_attr va;
	8870	int ret;
	8871
	8872	VATTR_INIT(&va);
	8873	VATTR_WANTED(&va, va_fsid);
	8874	VATTR_WANTED(&va, va_fileid);
	8875
	8876	if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) {
	8877	len = snprintf(path, len, "/non/existent/path/because/vnode_getattr/failed") + 1;
	8878	ret = -1;
	8879	} else {
	8880	len = snprintf(path, len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1;
	8881	ret = 0;
	8882	}
	8883
	8884	return ret;
	8885	}
	8886	#endif
	8887
	8888	//
	8889	// Note: this function does NOT check permissions on all of the
	8890	// parent directories leading to this vnode. It should only be
	8891	// called on behalf of a root process. Otherwise a process may
	8892	// get access to a file because the file itself is readable even
	8893	// though its parent directories would prevent access.
	8894	//
	8895	static int
	8896	vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx)
	8897	{
	8898	int error, action;
	8899
	8900	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
	8901	return error;
	8902	}
	8903
	8904	#if CONFIG_MACF
	8905	error = mac_vnode_check_open(ctx, vp, fmode);
	8906	if (error)
	8907	return error;
	8908	#endif
	8909
	8910	/* compute action to be authorized */
	8911	action = 0;
	8912	if (fmode & FREAD) {
	8913	action \|= KAUTH_VNODE_READ_DATA;
	8914	}
	8915	if (fmode & (FWRITE \| O_TRUNC)) {
	8916	/*
	8917	* If we are writing, appending, and not truncating,
	8918	* indicate that we are appending so that if the
	8919	* UF_APPEND or SF_APPEND bits are set, we do not deny
	8920	* the open.
	8921	*/
	8922	if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) {
	8923	action \|= KAUTH_VNODE_APPEND_DATA;
	8924	} else {
	8925	action \|= KAUTH_VNODE_WRITE_DATA;
	8926	}
	8927	}
	8928
	8929	if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)
	8930	return error;
	8931
	8932
	8933	//
	8934	// if the vnode is tagged VOPENEVT and the current process
	8935	// has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY
	8936	// flag to the open mode so that this open won't count against
	8937	// the vnode when carbon delete() does a vnode_isinuse() to see
	8938	// if a file is currently in use. this allows spotlight
	8939	// importers to not interfere with carbon apps that depend on
	8940	// the no-delete-if-busy semantics of carbon delete().
	8941	//
	8942	if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) {
	8943	fmode \|= O_EVTONLY;
	8944	}
	8945
	8946	if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) {
	8947	return error;
	8948	}
	8949	if ( (error = vnode_ref_ext(vp, fmode, 0)) ) {
	8950	VNOP_CLOSE(vp, fmode, ctx);
	8951	return error;
	8952	}
	8953
	8954	/* Call out to allow 3rd party notification of open.
	8955	* Ignore result of kauth_authorize_fileop call.
	8956	*/
	8957	#if CONFIG_MACF
	8958	mac_vnode_notify_open(ctx, vp, fmode);
	8959	#endif
	8960	kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN,
	8961	(uintptr_t)vp, 0);
	8962
	8963
	8964	return 0;
	8965	}
	8966
	8967	static int
	8968	wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type)
	8969	{
	8970	int i, error=0, unblock=0;
	8971	task_t curtask;
	8972
	8973	lck_mtx_lock(&nspace_handler_exclusion_lock);
	8974	if (nspace_handlers[nspace_type].handler_busy) {
	8975	lck_mtx_unlock(&nspace_handler_exclusion_lock);
	8976	return EBUSY;
	8977	}
	8978	nspace_handlers[nspace_type].handler_busy = 1;
	8979	lck_mtx_unlock(&nspace_handler_exclusion_lock);
	8980
	8981	/*
	8982	* Any process that gets here will be one of the namespace handlers.
	8983	* As such, they should be prevented from acquiring DMG vnodes during vnode reclamation
	8984	* as we can cause deadlocks to occur, because the namespace handler may prevent
	8985	* VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE
	8986	* process.
	8987	*/
	8988	curtask = current_task();
	8989	bsd_set_dependency_capable (curtask);
	8990
	8991	lck_mtx_lock(&nspace_handler_lock);
	8992	if (nspace_handlers[nspace_type].handler_proc == NULL) {
	8993	nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread());
	8994	nspace_handlers[nspace_type].handler_proc = current_proc();
	8995	}
	8996
	8997	while (error == 0) {
	8998
	8999	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	9000	if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
	9001	if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
	9002	continue;
	9003	}
	9004	break;
	9005	}
	9006	}
	9007
	9008	if (i < MAX_NSPACE_ITEMS) {
	9009	nspace_items[i].flags &= ~NSPACE_ITEM_NEW;
	9010	nspace_items[i].flags \|= NSPACE_ITEM_PROCESSING;
	9011	nspace_items[i].token = ++nspace_token_id;
	9012
	9013	if (nspace_items[i].vp) {
	9014	struct fileproc *fp;
	9015	int32_t indx, fmode;
	9016	struct proc *p = current_proc();
	9017	vfs_context_t ctx = vfs_context_current();
	9018	struct vnode_attr va;
	9019
	9020
	9021	/*
	9022	* Use vnode pointer to acquire a file descriptor for
	9023	* hand-off to userland
	9024	*/
	9025	fmode = nspace_open_flags_for_type(nspace_type);
	9026	error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid);
	9027	if (error) {
	9028	unblock = 1;
	9029	break;
	9030	}
	9031	error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx);
	9032	if (error) {
	9033	unblock = 1;
	9034	vnode_put(nspace_items[i].vp);
	9035	break;
	9036	}
	9037
	9038	if ((error = falloc(p, &fp, &indx, ctx))) {
	9039	vn_close(nspace_items[i].vp, fmode, ctx);
	9040	vnode_put(nspace_items[i].vp);
	9041	unblock = 1;
	9042	break;
	9043	}
	9044
	9045	fp->f_fglob->fg_flag = fmode;
	9046	fp->f_fglob->fg_ops = &vnops;
	9047	fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp;
	9048
	9049	proc_fdlock(p);
	9050	procfdtbl_releasefd(p, indx, NULL);
	9051	fp_drop(p, indx, fp, 1);
	9052	proc_fdunlock(p);
	9053
	9054	/*
	9055	* All variants of the namespace handler struct support these three fields:
	9056	* token, flags, and the FD pointer
	9057	*/
	9058	error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t));
	9059	error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t));
	9060	error = copyout(&indx, nhd->fdptr, sizeof(uint32_t));
	9061
	9062	/*
	9063	* Handle optional fields:
	9064	* extended version support an info ptr (offset, length), and the
	9065	*
	9066	* namedata version supports a unique per-link object ID
	9067	*
	9068	*/
	9069	if (nhd->infoptr) {
	9070	uio_t uio = (uio_t)nspace_items[i].arg;
	9071	uint64_t u_offset, u_length;
	9072
	9073	if (uio) {
	9074	u_offset = uio_offset(uio);
	9075	u_length = uio_resid(uio);
	9076	} else {
	9077	u_offset = 0;
	9078	u_length = 0;
	9079	}
	9080	error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t));
	9081	error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t));
	9082	}
	9083
	9084	if (nhd->objid) {
	9085	VATTR_INIT(&va);
	9086	VATTR_WANTED(&va, va_linkid);
	9087	error = vnode_getattr(nspace_items[i].vp, &va, ctx);
	9088	if (error == 0 ) {
	9089	uint64_t linkid = 0;
	9090	if (VATTR_IS_SUPPORTED (&va, va_linkid)) {
	9091	linkid = (uint64_t)va.va_linkid;
	9092	}
	9093	error = copyout (&linkid, nhd->objid, sizeof(uint64_t));
	9094	}
	9095	}
	9096
	9097	if (error) {
	9098	vn_close(nspace_items[i].vp, fmode, ctx);
	9099	fp_free(p, indx, fp);
	9100	unblock = 1;
	9101	}
	9102
	9103	vnode_put(nspace_items[i].vp);
	9104
	9105	break;
	9106	} else {
	9107	printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n",
	9108	i, nspace_items[i].vp, error, nspace_items[i].vp->v_name);
	9109	}
	9110
	9111	} else {
	9112	error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS\|PCATCH, "namespace-items", 0);
	9113	if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 \|\| snapshot_timestamp == ~0)) {
	9114	error = EINVAL;
	9115	break;
	9116	}
	9117
	9118	}
	9119	}
	9120
	9121	if (unblock) {
	9122	if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) {
	9123	vnode_lock_spin(nspace_items[i].vp);
	9124	nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
	9125	vnode_unlock(nspace_items[i].vp);
	9126	}
	9127	nspace_items[i].vp = NULL;
	9128	nspace_items[i].vid = 0;
	9129	nspace_items[i].flags = NSPACE_ITEM_DONE;
	9130	nspace_items[i].token = 0;
	9131
	9132	wakeup((caddr_t)&(nspace_items[i].vp));
	9133	}
	9134
	9135	if (nspace_type == NSPACE_HANDLER_SNAPSHOT) {
	9136	// just go through every snapshot event and unblock it immediately.
	9137	if (error && (snapshot_timestamp == 0 \|\| snapshot_timestamp == ~0)) {
	9138	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	9139	if (nspace_items[i].flags & NSPACE_ITEM_NEW) {
	9140	if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) {
	9141	nspace_items[i].vp = NULL;
	9142	nspace_items[i].vid = 0;
	9143	nspace_items[i].flags = NSPACE_ITEM_DONE;
	9144	nspace_items[i].token = 0;
	9145
	9146	wakeup((caddr_t)&(nspace_items[i].vp));
	9147	}
	9148	}
	9149	}
	9150	}
	9151	}
	9152
	9153	lck_mtx_unlock(&nspace_handler_lock);
	9154
	9155	lck_mtx_lock(&nspace_handler_exclusion_lock);
	9156	nspace_handlers[nspace_type].handler_busy = 0;
	9157	lck_mtx_unlock(&nspace_handler_exclusion_lock);
	9158
	9159	return error;
	9160	}
	9161
	9162	static inline int validate_namespace_args (int is64bit, int size) {
	9163
	9164	if (is64bit) {
	9165	/* Must be one of these */
	9166	if (size == sizeof(user64_namespace_handler_info)) {
	9167	goto sizeok;
	9168	}
	9169	if (size == sizeof(user64_namespace_handler_info_ext)) {
	9170	goto sizeok;
	9171	}
	9172	if (size == sizeof(user64_namespace_handler_data)) {
	9173	goto sizeok;
	9174	}
	9175	return EINVAL;
	9176	}
	9177	else {
	9178	/* 32 bit -- must be one of these */
	9179	if (size == sizeof(user32_namespace_handler_info)) {
	9180	goto sizeok;
	9181	}
	9182	if (size == sizeof(user32_namespace_handler_info_ext)) {
	9183	goto sizeok;
	9184	}
	9185	if (size == sizeof(user32_namespace_handler_data)) {
	9186	goto sizeok;
	9187	}
	9188	return EINVAL;
	9189	}
	9190
	9191	sizeok:
	9192
	9193	return 0;
	9194
	9195	}
	9196
	9197	static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data)
	9198	{
	9199	int error = 0;
	9200	namespace_handler_data nhd;
	9201
	9202	bzero (&nhd, sizeof(namespace_handler_data));
	9203
	9204	if (nspace_type == NSPACE_HANDLER_SNAPSHOT &&
	9205	(snapshot_timestamp == 0 \|\| snapshot_timestamp == ~0)) {
	9206	return EINVAL;
	9207	}
	9208
	9209	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
	9210	return error;
	9211	}
	9212
	9213	error = validate_namespace_args (is64bit, size);
	9214	if (error) {
	9215	return error;
	9216	}
	9217
	9218	/* Copy in the userland pointers into our kernel-only struct */
	9219
	9220	if (is64bit) {
	9221	/* 64 bit userland structures */
	9222	nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token;
	9223	nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags;
	9224	nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr;
	9225
	9226	/* If the size is greater than the standard info struct, add in extra fields */
	9227	if (size > (sizeof(user64_namespace_handler_info))) {
	9228	if (size >= (sizeof(user64_namespace_handler_info_ext))) {
	9229	nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr;
	9230	}
	9231	if (size == (sizeof(user64_namespace_handler_data))) {
	9232	nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid;
	9233	}
	9234	/* Otherwise the fields were pre-zeroed when we did the bzero above. */
	9235	}
	9236	}
	9237	else {
	9238	/* 32 bit userland structures */
	9239	nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token);
	9240	nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags);
	9241	nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr);
	9242
	9243	if (size > (sizeof(user32_namespace_handler_info))) {
	9244	if (size >= (sizeof(user32_namespace_handler_info_ext))) {
	9245	nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr);
	9246	}
	9247	if (size == (sizeof(user32_namespace_handler_data))) {
	9248	nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid;
	9249	}
	9250	/* Otherwise the fields were pre-zeroed when we did the bzero above. */
	9251	}
	9252	}
	9253
	9254	return wait_for_namespace_event(&nhd, nspace_type);
	9255	}
	9256
	9257	/*
	9258	* Make a filesystem-specific control call:
	9259	*/
	9260	/* ARGSUSED */
	9261	static int
	9262	fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx)
	9263	{
	9264	int error=0;
	9265	boolean_t is64bit;
	9266	u_int size;
	9267	#define STK_PARAMS 128
	9268	char stkbuf[STK_PARAMS];
	9269	caddr_t data, memp;
	9270	vnode_t vp = *arg_vp;
	9271
	9272	size = IOCPARM_LEN(cmd);
	9273	if (size > IOCPARM_MAX) return (EINVAL);
	9274
	9275	is64bit = proc_is64bit(p);
	9276
	9277	memp = NULL;
	9278
	9279
	9280	/*
	9281	* ensure the buffer is large enough for underlying calls
	9282	*/
	9283	#ifndef HFSIOC_GETPATH
	9284	typedef char pn_t[MAXPATHLEN];
	9285	#define HFSIOC_GETPATH _IOWR('h', 13, pn_t)
	9286	#endif
	9287
	9288	#ifndef HFS_GETPATH
	9289	#define HFS_GETPATH IOCBASECMD(HFSIOC_GETPATH)
	9290	#endif
	9291	if (IOCBASECMD(cmd) == HFS_GETPATH) {
	9292	/* Round up to MAXPATHLEN regardless of user input */
	9293	size = MAXPATHLEN;
	9294	}
	9295
	9296	if (size > sizeof (stkbuf)) {
	9297	if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM;
	9298	data = memp;
	9299	} else {
	9300	data = &stkbuf[0];
	9301	};
	9302
	9303	if (cmd & IOC_IN) {
	9304	if (size) {
	9305	error = copyin(udata, data, size);
	9306	if (error) {
	9307	if (memp) {
	9308	kfree (memp, size);
	9309	}
	9310	return error;
	9311	}
	9312	} else {
	9313	if (is64bit) {
	9314	(user_addr_t )data = udata;
	9315	}
	9316	else {
	9317	(uint32_t )data = (uint32_t)udata;
	9318	}
	9319	};
	9320	} else if ((cmd & IOC_OUT) && size) {
	9321	/*
	9322	* Zero the buffer so the user always
	9323	* gets back something deterministic.
	9324	*/
	9325	bzero(data, size);
	9326	} else if (cmd & IOC_VOID) {
	9327	if (is64bit) {
	9328	(user_addr_t )data = udata;
	9329	}
	9330	else {
	9331	(uint32_t )data = (uint32_t)udata;
	9332	}
	9333	}
	9334
	9335	/* Check to see if it's a generic command */
	9336	switch (IOCBASECMD(cmd)) {
	9337
	9338	case FSCTL_SYNC_VOLUME: {
	9339	mount_t mp = vp->v_mount;
	9340	int arg = (uint32_t)data;
	9341
	9342	/* record vid of vp so we can drop it below. */
	9343	uint32_t vvid = vp->v_id;
	9344
	9345	/*
	9346	* Then grab mount_iterref so that we can release the vnode.
	9347	* Without this, a thread may call vnode_iterate_prepare then
	9348	* get into a deadlock because we've never released the root vp
	9349	*/
	9350	error = mount_iterref (mp, 0);
	9351	if (error) {
	9352	break;
	9353	}
	9354	vnode_put(vp);
	9355
	9356	/* issue the sync for this volume */
	9357	(void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL);
	9358
	9359	/*
	9360	* Then release the mount_iterref once we're done syncing; it's not
	9361	* needed for the VNOP_IOCTL below
	9362	*/
	9363	mount_iterdrop(mp);
	9364
	9365	if (arg & FSCTL_SYNC_FULLSYNC) {
	9366	/* re-obtain vnode iocount on the root vp, if possible */
	9367	error = vnode_getwithvid (vp, vvid);
	9368	if (error == 0) {
	9369	error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx);
	9370	vnode_put (vp);
	9371	}
	9372	}
	9373	/* mark the argument VP as having been released */
	9374	*arg_vp = NULL;
	9375	}
	9376	break;
	9377
	9378	case FSCTL_SET_PACKAGE_EXTS: {
	9379	user_addr_t ext_strings;
	9380	uint32_t num_entries;
	9381	uint32_t max_width;
	9382
	9383	if ( (is64bit && size != sizeof(user64_package_ext_info))
	9384	\|\| (is64bit == 0 && size != sizeof(user32_package_ext_info))) {
	9385
	9386	// either you're 64-bit and passed a 64-bit struct or
	9387	// you're 32-bit and passed a 32-bit struct. otherwise
	9388	// it's not ok.
	9389	error = EINVAL;
	9390	break;
	9391	}
	9392
	9393	if (is64bit) {
	9394	ext_strings = ((user64_package_ext_info *)data)->strings;
	9395	num_entries = ((user64_package_ext_info *)data)->num_entries;
	9396	max_width = ((user64_package_ext_info *)data)->max_width;
	9397	} else {
	9398	ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings);
	9399	num_entries = ((user32_package_ext_info *)data)->num_entries;
	9400	max_width = ((user32_package_ext_info *)data)->max_width;
	9401	}
	9402	error = set_package_extensions_table(ext_strings, num_entries, max_width);
	9403	}
	9404	break;
	9405
	9406	/* namespace handlers */
	9407	case FSCTL_NAMESPACE_HANDLER_GET: {
	9408	error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data);
	9409	}
	9410	break;
	9411
	9412	/* Snapshot handlers */
	9413	case FSCTL_OLD_SNAPSHOT_HANDLER_GET: {
	9414	error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
	9415	}
	9416	break;
	9417
	9418	case FSCTL_SNAPSHOT_HANDLER_GET_EXT: {
	9419	error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data);
	9420	}
	9421	break;
	9422
	9423	case FSCTL_NAMESPACE_HANDLER_UPDATE: {
	9424	uint32_t token, val;
	9425	int i;
	9426
	9427	if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
	9428	break;
	9429	}
	9430
	9431	if (!nspace_is_special_process(p)) {
	9432	error = EINVAL;
	9433	break;
	9434	}
	9435
	9436	token = ((uint32_t *)data)[0];
	9437	val = ((uint32_t *)data)[1];
	9438
	9439	lck_mtx_lock(&nspace_handler_lock);
	9440
	9441	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	9442	if (nspace_items[i].token == token) {
	9443	break; /* exit for loop, not case stmt */
	9444	}
	9445	}
	9446
	9447	if (i >= MAX_NSPACE_ITEMS) {
	9448	error = ENOENT;
	9449	} else {
	9450	//
	9451	// if this bit is set, when resolve_nspace_item() times out
	9452	// it will loop and go back to sleep.
	9453	//
	9454	nspace_items[i].flags \|= NSPACE_ITEM_RESET_TIMER;
	9455	}
	9456
	9457	lck_mtx_unlock(&nspace_handler_lock);
	9458
	9459	if (error) {
	9460	printf("nspace-handler-update: did not find token %u\n", token);
	9461	}
	9462	}
	9463	break;
	9464
	9465	case FSCTL_NAMESPACE_HANDLER_UNBLOCK: {
	9466	uint32_t token, val;
	9467	int i;
	9468
	9469	if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
	9470	break;
	9471	}
	9472
	9473	if (!nspace_is_special_process(p)) {
	9474	error = EINVAL;
	9475	break;
	9476	}
	9477
	9478	token = ((uint32_t *)data)[0];
	9479	val = ((uint32_t *)data)[1];
	9480
	9481	lck_mtx_lock(&nspace_handler_lock);
	9482
	9483	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	9484	if (nspace_items[i].token == token) {
	9485	break; /* exit for loop, not case statement */
	9486	}
	9487	}
	9488
	9489	if (i >= MAX_NSPACE_ITEMS) {
	9490	printf("nspace-handler-unblock: did not find token %u\n", token);
	9491	error = ENOENT;
	9492	} else {
	9493	if (val == 0 && nspace_items[i].vp) {
	9494	vnode_lock_spin(nspace_items[i].vp);
	9495	nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
	9496	vnode_unlock(nspace_items[i].vp);
	9497	}
	9498
	9499	nspace_items[i].vp = NULL;
	9500	nspace_items[i].arg = NULL;
	9501	nspace_items[i].op = 0;
	9502	nspace_items[i].vid = 0;
	9503	nspace_items[i].flags = NSPACE_ITEM_DONE;
	9504	nspace_items[i].token = 0;
	9505
	9506	wakeup((caddr_t)&(nspace_items[i].vp));
	9507	}
	9508
	9509	lck_mtx_unlock(&nspace_handler_lock);
	9510	}
	9511	break;
	9512
	9513	case FSCTL_NAMESPACE_HANDLER_CANCEL: {
	9514	uint32_t token, val;
	9515	int i;
	9516
	9517	if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) {
	9518	break;
	9519	}
	9520
	9521	if (!nspace_is_special_process(p)) {
	9522	error = EINVAL;
	9523	break;
	9524	}
	9525
	9526	token = ((uint32_t *)data)[0];
	9527	val = ((uint32_t *)data)[1];
	9528
	9529	lck_mtx_lock(&nspace_handler_lock);
	9530
	9531	for(i=0; i < MAX_NSPACE_ITEMS; i++) {
	9532	if (nspace_items[i].token == token) {
	9533	break; /* exit for loop, not case stmt */
	9534	}
	9535	}
	9536
	9537	if (i >= MAX_NSPACE_ITEMS) {
	9538	printf("nspace-handler-cancel: did not find token %u\n", token);
	9539	error = ENOENT;
	9540	} else {
	9541	if (nspace_items[i].vp) {
	9542	vnode_lock_spin(nspace_items[i].vp);
	9543	nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT;
	9544	vnode_unlock(nspace_items[i].vp);
	9545	}
	9546
	9547	nspace_items[i].vp = NULL;
	9548	nspace_items[i].arg = NULL;
	9549	nspace_items[i].vid = 0;
	9550	nspace_items[i].token = val;
	9551	nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING;
	9552	nspace_items[i].flags \|= NSPACE_ITEM_CANCELLED;
	9553
	9554	wakeup((caddr_t)&(nspace_items[i].vp));
	9555	}
	9556
	9557	lck_mtx_unlock(&nspace_handler_lock);
	9558	}
	9559	break;
	9560
	9561	case FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME: {
	9562	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
	9563	break;
	9564	}
	9565
	9566	// we explicitly do not do the namespace_handler_proc check here
	9567
	9568	lck_mtx_lock(&nspace_handler_lock);
	9569	snapshot_timestamp = ((uint32_t *)data)[0];
	9570	wakeup(&nspace_item_idx);
	9571	lck_mtx_unlock(&nspace_handler_lock);
	9572	printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp);
	9573
	9574	}
	9575	break;
	9576
	9577	case FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS:
	9578	{
	9579	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
	9580	break;
	9581	}
	9582
	9583	lck_mtx_lock(&nspace_handler_lock);
	9584	nspace_allow_virtual_devs = ((uint32_t *)data)[0];
	9585	lck_mtx_unlock(&nspace_handler_lock);
	9586	printf("nspace-snapshot-handler will%s allow events on disk-images\n",
	9587	nspace_allow_virtual_devs ? "" : " NOT");
	9588	error = 0;
	9589
	9590	}
	9591	break;
	9592
	9593	case FSCTL_SET_FSTYPENAME_OVERRIDE:
	9594	{
	9595	if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) {
	9596	break;
	9597	}
	9598	if (vp->v_mount) {
	9599	mount_lock(vp->v_mount);
	9600	if (data[0] != 0) {
	9601	strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN);
	9602	vp->v_mount->mnt_kern_flag \|= MNTK_TYPENAME_OVERRIDE;
	9603	if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
	9604	vp->v_mount->mnt_kern_flag \|= MNTK_EXTENDED_SECURITY;
	9605	vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE;
	9606	}
	9607	} else {
	9608	if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) {
	9609	vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY;
	9610	}
	9611	vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE;
	9612	vp->v_mount->fstypename_override[0] = '\0';
	9613	}
	9614	mount_unlock(vp->v_mount);
	9615	}
	9616	}
	9617	break;
	9618
	9619	default: {
	9620	/* Invoke the filesystem-specific code */
	9621	error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx);
	9622	}
	9623
	9624	} /* end switch stmt */
	9625
	9626	/*
	9627	* if no errors, copy any data to user. Size was
	9628	* already set and checked above.
	9629	*/
	9630	if (error == 0 && (cmd & IOC_OUT) && size)
	9631	error = copyout(data, udata, size);
	9632
	9633	if (memp) {
	9634	kfree(memp, size);
	9635	}
	9636
	9637	return error;
	9638	}
	9639
	9640	/* ARGSUSED */
	9641	int
	9642	fsctl (proc_t p, struct fsctl_args uap, __unused int32_t retval)
	9643	{
	9644	int error;
	9645	struct nameidata nd;
	9646	u_long nameiflags;
	9647	vnode_t vp = NULL;
	9648	vfs_context_t ctx = vfs_context_current();
	9649
	9650	AUDIT_ARG(cmd, uap->cmd);
	9651	AUDIT_ARG(value32, uap->options);
	9652	/* Get the vnode for the file we are getting info on: */
	9653	nameiflags = 0;
	9654	if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags \|= FOLLOW;
	9655	NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags \| AUDITVNPATH1,
	9656	UIO_USERSPACE, uap->path, ctx);
	9657	if ((error = namei(&nd))) goto done;
	9658	vp = nd.ni_vp;
	9659	nameidone(&nd);
	9660
	9661	#if CONFIG_MACF
	9662	error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd);
	9663	if (error) {
	9664	goto done;
	9665	}
	9666	#endif
	9667
	9668	error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
	9669
	9670	done:
	9671	if (vp)
	9672	vnode_put(vp);
	9673	return error;
	9674	}
	9675	/* ARGSUSED */
	9676	int
	9677	ffsctl (proc_t p, struct ffsctl_args uap, __unused int32_t retval)
	9678	{
	9679	int error;
	9680	vnode_t vp = NULL;
	9681	vfs_context_t ctx = vfs_context_current();
	9682	int fd = -1;
	9683
	9684	AUDIT_ARG(fd, uap->fd);
	9685	AUDIT_ARG(cmd, uap->cmd);
	9686	AUDIT_ARG(value32, uap->options);
	9687
	9688	/* Get the vnode for the file we are getting info on: */
	9689	if ((error = file_vnode(uap->fd, &vp)))
	9690	return error;
	9691	fd = uap->fd;
	9692	if ((error = vnode_getwithref(vp))) {
	9693	file_drop(fd);
	9694	return error;
	9695	}
	9696
	9697	#if CONFIG_MACF
	9698	if ((error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd))) {
	9699	file_drop(fd);
	9700	vnode_put(vp);
	9701	return error;
	9702	}
	9703	#endif
	9704
	9705	error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx);
	9706
	9707	file_drop(fd);
	9708
	9709	/validate vp; fsctl_internal() can drop iocount and reset vp to NULL/
	9710	if (vp) {
	9711	vnode_put(vp);
	9712	}
	9713
	9714	return error;
	9715	}
	9716	/* end of fsctl system call */
	9717
	9718	/*
	9719	* Retrieve the data of an extended attribute.
	9720	*/
	9721	int
	9722	getxattr(proc_t p, struct getxattr_args uap, user_ssize_t retval)
	9723	{
	9724	vnode_t vp;
	9725	struct nameidata nd;
	9726	char attrname[XATTR_MAXNAMELEN+1];
	9727	vfs_context_t ctx = vfs_context_current();
	9728	uio_t auio = NULL;
	9729	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	9730	size_t attrsize = 0;
	9731	size_t namelen;
	9732	u_int32_t nameiflags;
	9733	int error;
	9734	char uio_buf[ UIO_SIZEOF(1) ];
	9735
	9736	if (uap->options & (XATTR_NOSECURITY \| XATTR_NODEFAULT))
	9737	return (EINVAL);
	9738
	9739	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
	9740	NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx);
	9741	if ((error = namei(&nd))) {
	9742	return (error);
	9743	}
	9744	vp = nd.ni_vp;
	9745	nameidone(&nd);
	9746
	9747	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
	9748	goto out;
	9749	}
	9750	if (xattr_protected(attrname)) {
	9751	if (!vfs_context_issuser(ctx) \|\| strcmp(attrname, "com.apple.system.Security") != 0) {
	9752	error = EPERM;
	9753	goto out;
	9754	}
	9755	}
	9756	/*
	9757	* the specific check for 0xffffffff is a hack to preserve
	9758	* binaray compatibilty in K64 with applications that discovered
	9759	* that passing in a buf pointer and a size of -1 resulted in
	9760	* just the size of the indicated extended attribute being returned.
	9761	* this isn't part of the documented behavior, but because of the
	9762	* original implemtation's check for "uap->size > 0", this behavior
	9763	* was allowed. In K32 that check turned into a signed comparison
	9764	* even though uap->size is unsigned... in K64, we blow by that
	9765	* check because uap->size is unsigned and doesn't get sign smeared
	9766	* in the munger for a 32 bit user app. we also need to add a
	9767	* check to limit the maximum size of the buffer being passed in...
	9768	* unfortunately, the underlying fileystems seem to just malloc
	9769	* the requested size even if the actual extended attribute is tiny.
	9770	* because that malloc is for kernel wired memory, we have to put a
	9771	* sane limit on it.
	9772	*
	9773	* U32 running on K64 will yield 0x00000000ffffffff for uap->size
	9774	* U64 running on K64 will yield -1 (64 bits wide)
	9775	* U32/U64 running on K32 will yield -1 (32 bits wide)
	9776	*/
	9777	if (uap->size == 0xffffffff \|\| uap->size == (size_t)-1)
	9778	goto no_uio;
	9779
	9780	if (uap->value) {
	9781	if (uap->size > (size_t)XATTR_MAXSIZE)
	9782	uap->size = XATTR_MAXSIZE;
	9783
	9784	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
	9785	&uio_buf[0], sizeof(uio_buf));
	9786	uio_addiov(auio, uap->value, uap->size);
	9787	}
	9788	no_uio:
	9789	error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx);
	9790	out:
	9791	vnode_put(vp);
	9792
	9793	if (auio) {
	9794	*retval = uap->size - uio_resid(auio);
	9795	} else {
	9796	*retval = (user_ssize_t)attrsize;
	9797	}
	9798
	9799	return (error);
	9800	}
	9801
	9802	/*
	9803	* Retrieve the data of an extended attribute.
	9804	*/
	9805	int
	9806	fgetxattr(proc_t p, struct fgetxattr_args uap, user_ssize_t retval)
	9807	{
	9808	vnode_t vp;
	9809	char attrname[XATTR_MAXNAMELEN+1];
	9810	uio_t auio = NULL;
	9811	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	9812	size_t attrsize = 0;
	9813	size_t namelen;
	9814	int error;
	9815	char uio_buf[ UIO_SIZEOF(1) ];
	9816
	9817	if (uap->options & (XATTR_NOFOLLOW \| XATTR_NOSECURITY \| XATTR_NODEFAULT))
	9818	return (EINVAL);
	9819
	9820	if ( (error = file_vnode(uap->fd, &vp)) ) {
	9821	return (error);
	9822	}
	9823	if ( (error = vnode_getwithref(vp)) ) {
	9824	file_drop(uap->fd);
	9825	return(error);
	9826	}
	9827	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
	9828	goto out;
	9829	}
	9830	if (xattr_protected(attrname)) {
	9831	error = EPERM;
	9832	goto out;
	9833	}
	9834	if (uap->value && uap->size > 0) {
	9835	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ,
	9836	&uio_buf[0], sizeof(uio_buf));
	9837	uio_addiov(auio, uap->value, uap->size);
	9838	}
	9839
	9840	error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current());
	9841	out:
	9842	(void)vnode_put(vp);
	9843	file_drop(uap->fd);
	9844
	9845	if (auio) {
	9846	*retval = uap->size - uio_resid(auio);
	9847	} else {
	9848	*retval = (user_ssize_t)attrsize;
	9849	}
	9850	return (error);
	9851	}
	9852
	9853	/*
	9854	* Set the data of an extended attribute.
	9855	*/
	9856	int
	9857	setxattr(proc_t p, struct setxattr_args uap, int retval)
	9858	{
	9859	vnode_t vp;
	9860	struct nameidata nd;
	9861	char attrname[XATTR_MAXNAMELEN+1];
	9862	vfs_context_t ctx = vfs_context_current();
	9863	uio_t auio = NULL;
	9864	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	9865	size_t namelen;
	9866	u_int32_t nameiflags;
	9867	int error;
	9868	char uio_buf[ UIO_SIZEOF(1) ];
	9869
	9870	if (uap->options & (XATTR_NOSECURITY \| XATTR_NODEFAULT))
	9871	return (EINVAL);
	9872
	9873	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
	9874	if (error == EPERM) {
	9875	/* if the string won't fit in attrname, copyinstr emits EPERM */
	9876	return (ENAMETOOLONG);
	9877	}
	9878	/* Otherwise return the default error from copyinstr to detect ERANGE, etc */
	9879	return error;
	9880	}
	9881	if (xattr_protected(attrname))
	9882	return(EPERM);
	9883	if (uap->size != 0 && uap->value == 0) {
	9884	return (EINVAL);
	9885	}
	9886
	9887	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
	9888	NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx);
	9889	if ((error = namei(&nd))) {
	9890	return (error);
	9891	}
	9892	vp = nd.ni_vp;
	9893	nameidone(&nd);
	9894
	9895	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
	9896	&uio_buf[0], sizeof(uio_buf));
	9897	uio_addiov(auio, uap->value, uap->size);
	9898
	9899	error = vn_setxattr(vp, attrname, auio, uap->options, ctx);
	9900	#if CONFIG_FSE
	9901	if (error == 0) {
	9902	add_fsevent(FSE_XATTR_MODIFIED, ctx,
	9903	FSE_ARG_VNODE, vp,
	9904	FSE_ARG_DONE);
	9905	}
	9906	#endif
	9907	vnode_put(vp);
	9908	*retval = 0;
	9909	return (error);
	9910	}
	9911
	9912	/*
	9913	* Set the data of an extended attribute.
	9914	*/
	9915	int
	9916	fsetxattr(proc_t p, struct fsetxattr_args uap, int retval)
	9917	{
	9918	vnode_t vp;
	9919	char attrname[XATTR_MAXNAMELEN+1];
	9920	uio_t auio = NULL;
	9921	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	9922	size_t namelen;
	9923	int error;
	9924	char uio_buf[ UIO_SIZEOF(1) ];
	9925	#if CONFIG_FSE
	9926	vfs_context_t ctx = vfs_context_current();
	9927	#endif
	9928
	9929	if (uap->options & (XATTR_NOFOLLOW \| XATTR_NOSECURITY \| XATTR_NODEFAULT))
	9930	return (EINVAL);
	9931
	9932	if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) {
	9933	if (error == EPERM) {
	9934	/* if the string won't fit in attrname, copyinstr emits EPERM */
	9935	return (ENAMETOOLONG);
	9936	}
	9937	/* Otherwise return the default error from copyinstr to detect ERANGE, etc */
	9938	return error;
	9939	}
	9940	if (xattr_protected(attrname))
	9941	return(EPERM);
	9942	if (uap->size != 0 && uap->value == 0) {
	9943	return (EINVAL);
	9944	}
	9945	if ( (error = file_vnode(uap->fd, &vp)) ) {
	9946	return (error);
	9947	}
	9948	if ( (error = vnode_getwithref(vp)) ) {
	9949	file_drop(uap->fd);
	9950	return(error);
	9951	}
	9952	auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE,
	9953	&uio_buf[0], sizeof(uio_buf));
	9954	uio_addiov(auio, uap->value, uap->size);
	9955
	9956	error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current());
	9957	#if CONFIG_FSE
	9958	if (error == 0) {
	9959	add_fsevent(FSE_XATTR_MODIFIED, ctx,
	9960	FSE_ARG_VNODE, vp,
	9961	FSE_ARG_DONE);
	9962	}
	9963	#endif
	9964	vnode_put(vp);
	9965	file_drop(uap->fd);
	9966	*retval = 0;
	9967	return (error);
	9968	}
	9969
	9970	/*
	9971	* Remove an extended attribute.
	9972	* XXX Code duplication here.
	9973	*/
	9974	int
	9975	removexattr(proc_t p, struct removexattr_args uap, int retval)
	9976	{
	9977	vnode_t vp;
	9978	struct nameidata nd;
	9979	char attrname[XATTR_MAXNAMELEN+1];
	9980	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	9981	vfs_context_t ctx = vfs_context_current();
	9982	size_t namelen;
	9983	u_int32_t nameiflags;
	9984	int error;
	9985
	9986	if (uap->options & (XATTR_NOSECURITY \| XATTR_NODEFAULT))
	9987	return (EINVAL);
	9988
	9989	error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
	9990	if (error != 0) {
	9991	return (error);
	9992	}
	9993	if (xattr_protected(attrname))
	9994	return(EPERM);
	9995	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
	9996	NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx);
	9997	if ((error = namei(&nd))) {
	9998	return (error);
	9999	}
	10000	vp = nd.ni_vp;
	10001	nameidone(&nd);
	10002
	10003	error = vn_removexattr(vp, attrname, uap->options, ctx);
	10004	#if CONFIG_FSE
	10005	if (error == 0) {
	10006	add_fsevent(FSE_XATTR_REMOVED, ctx,
	10007	FSE_ARG_VNODE, vp,
	10008	FSE_ARG_DONE);
	10009	}
	10010	#endif
	10011	vnode_put(vp);
	10012	*retval = 0;
	10013	return (error);
	10014	}
	10015
	10016	/*
	10017	* Remove an extended attribute.
	10018	* XXX Code duplication here.
	10019	*/
	10020	int
	10021	fremovexattr(__unused proc_t p, struct fremovexattr_args uap, int retval)
	10022	{
	10023	vnode_t vp;
	10024	char attrname[XATTR_MAXNAMELEN+1];
	10025	size_t namelen;
	10026	int error;
	10027	#if CONFIG_FSE
	10028	vfs_context_t ctx = vfs_context_current();
	10029	#endif
	10030
	10031	if (uap->options & (XATTR_NOFOLLOW \| XATTR_NOSECURITY \| XATTR_NODEFAULT))
	10032	return (EINVAL);
	10033
	10034	error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen);
	10035	if (error != 0) {
	10036	return (error);
	10037	}
	10038	if (xattr_protected(attrname))
	10039	return(EPERM);
	10040	if ( (error = file_vnode(uap->fd, &vp)) ) {
	10041	return (error);
	10042	}
	10043	if ( (error = vnode_getwithref(vp)) ) {
	10044	file_drop(uap->fd);
	10045	return(error);
	10046	}
	10047
	10048	error = vn_removexattr(vp, attrname, uap->options, vfs_context_current());
	10049	#if CONFIG_FSE
	10050	if (error == 0) {
	10051	add_fsevent(FSE_XATTR_REMOVED, ctx,
	10052	FSE_ARG_VNODE, vp,
	10053	FSE_ARG_DONE);
	10054	}
	10055	#endif
	10056	vnode_put(vp);
	10057	file_drop(uap->fd);
	10058	*retval = 0;
	10059	return (error);
	10060	}
	10061
	10062	/*
	10063	* Retrieve the list of extended attribute names.
	10064	* XXX Code duplication here.
	10065	*/
	10066	int
	10067	listxattr(proc_t p, struct listxattr_args uap, user_ssize_t retval)
	10068	{
	10069	vnode_t vp;
	10070	struct nameidata nd;
	10071	vfs_context_t ctx = vfs_context_current();
	10072	uio_t auio = NULL;
	10073	int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	10074	size_t attrsize = 0;
	10075	u_int32_t nameiflags;
	10076	int error;
	10077	char uio_buf[ UIO_SIZEOF(1) ];
	10078
	10079	if (uap->options & (XATTR_NOSECURITY \| XATTR_NODEFAULT))
	10080	return (EINVAL);
	10081
	10082	nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW;
	10083	NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx);
	10084	if ((error = namei(&nd))) {
	10085	return (error);
	10086	}
	10087	vp = nd.ni_vp;
	10088	nameidone(&nd);
	10089	if (uap->namebuf != 0 && uap->bufsize > 0) {
	10090	auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ,
	10091	&uio_buf[0], sizeof(uio_buf));
	10092	uio_addiov(auio, uap->namebuf, uap->bufsize);
	10093	}
	10094
	10095	error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx);
	10096
	10097	vnode_put(vp);
	10098	if (auio) {
	10099	*retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
	10100	} else {
	10101	*retval = (user_ssize_t)attrsize;
	10102	}
	10103	return (error);
	10104	}
	10105
	10106	/*
	10107	* Retrieve the list of extended attribute names.
	10108	* XXX Code duplication here.
	10109	*/
	10110	int
	10111	flistxattr(proc_t p, struct flistxattr_args uap, user_ssize_t retval)
	10112	{
	10113	vnode_t vp;
	10114	uio_t auio = NULL;
	10115	int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32;
	10116	size_t attrsize = 0;
	10117	int error;
	10118	char uio_buf[ UIO_SIZEOF(1) ];
	10119
	10120	if (uap->options & (XATTR_NOFOLLOW \| XATTR_NOSECURITY \| XATTR_NODEFAULT))
	10121	return (EINVAL);
	10122
	10123	if ( (error = file_vnode(uap->fd, &vp)) ) {
	10124	return (error);
	10125	}
	10126	if ( (error = vnode_getwithref(vp)) ) {
	10127	file_drop(uap->fd);
	10128	return(error);
	10129	}
	10130	if (uap->namebuf != 0 && uap->bufsize > 0) {
	10131	auio = uio_createwithbuffer(1, 0, spacetype,
	10132	UIO_READ, &uio_buf[0], sizeof(uio_buf));
	10133	uio_addiov(auio, uap->namebuf, uap->bufsize);
	10134	}
	10135
	10136	error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current());
	10137
	10138	vnode_put(vp);
	10139	file_drop(uap->fd);
	10140	if (auio) {
	10141	*retval = (user_ssize_t)uap->bufsize - uio_resid(auio);
	10142	} else {
	10143	*retval = (user_ssize_t)attrsize;
	10144	}
	10145	return (error);
	10146	}
	10147
	10148	static int fsgetpath_internal(
	10149	vfs_context_t ctx, int volfs_id, uint64_t objid,
	10150	vm_size_t bufsize, caddr_t buf, int *pathlen)
	10151	{
	10152	int error;
	10153	struct mount *mp = NULL;
	10154	vnode_t vp;
	10155	int length;
	10156	int bpflags;
	10157
	10158	if (bufsize > PAGE_SIZE) {
	10159	return (EINVAL);
	10160	}
	10161
	10162	if (buf == NULL) {
	10163	return (ENOMEM);
	10164	}
	10165
	10166	if ((mp = mount_lookupby_volfsid(volfs_id, 1)) == NULL) {
	10167	error = ENOTSUP; /* unexpected failure */
	10168	return ENOTSUP;
	10169	}
	10170
	10171	unionget:
	10172	if (objid == 2) {
	10173	error = VFS_ROOT(mp, &vp, ctx);
	10174	} else {
	10175	error = VFS_VGET(mp, (ino64_t)objid, &vp, ctx);
	10176	}
	10177
	10178	if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) {
	10179	/*
	10180	* If the fileid isn't found and we're in a union
	10181	* mount volume, then see if the fileid is in the
	10182	* mounted-on volume.
	10183	*/
	10184	struct mount *tmp = mp;
	10185	mp = vnode_mount(tmp->mnt_vnodecovered);
	10186	vfs_unbusy(tmp);
	10187	if (vfs_busy(mp, LK_NOWAIT) == 0)
	10188	goto unionget;
	10189	} else {
	10190	vfs_unbusy(mp);
	10191	}
	10192
	10193	if (error) {
	10194	return error;
	10195	}
	10196
	10197	#if CONFIG_MACF
	10198	error = mac_vnode_check_fsgetpath(ctx, vp);
	10199	if (error) {
	10200	vnode_put(vp);
	10201	return error;
	10202	}
	10203	#endif
	10204
	10205	/* Obtain the absolute path to this vnode. */
	10206	bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0;
	10207	bpflags \|= BUILDPATH_CHECK_MOVED;
	10208	error = build_path(vp, buf, bufsize, &length, bpflags, ctx);
	10209	vnode_put(vp);
	10210
	10211	if (error) {
	10212	goto out;
	10213	}
	10214
	10215	AUDIT_ARG(text, buf);
	10216
	10217	if (kdebug_enable) {
	10218	long dbg_parms[NUMPARMS];
	10219	int dbg_namelen;
	10220
	10221	dbg_namelen = (int)sizeof(dbg_parms);
	10222
	10223	if (length < dbg_namelen) {
	10224	memcpy((char *)dbg_parms, buf, length);
	10225	memset((char *)dbg_parms + length, 0, dbg_namelen - length);
	10226
	10227	dbg_namelen = length;
	10228	} else {
	10229	memcpy((char *)dbg_parms, buf + (length - dbg_namelen), dbg_namelen);
	10230	}
	10231
	10232	kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE);
	10233	}
	10234
	10235	pathlen = (user_ssize_t)length; / may be superseded by error */
	10236
	10237	out:
	10238	return (error);
	10239	}
	10240
	10241	/*
	10242	* Obtain the full pathname of a file system object by id.
	10243	*
	10244	* This is a private SPI used by the File Manager.
	10245	*/
	10246	__private_extern__
	10247	int
	10248	fsgetpath(__unused proc_t p, struct fsgetpath_args uap, user_ssize_t retval)
	10249	{
	10250	vfs_context_t ctx = vfs_context_current();
	10251	fsid_t fsid;
	10252	char *realpath;
	10253	int length;
	10254	int error;
	10255
	10256	if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) {
	10257	return (error);
	10258	}
	10259	AUDIT_ARG(value32, fsid.val[0]);
	10260	AUDIT_ARG(value64, uap->objid);
	10261	/* Restrict output buffer size for now. */
	10262
	10263	if (uap->bufsize > PAGE_SIZE) {
	10264	return (EINVAL);
	10265	}
	10266	MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK);
	10267	if (realpath == NULL) {
	10268	return (ENOMEM);
	10269	}
	10270
	10271	error = fsgetpath_internal(
	10272	ctx, fsid.val[0], uap->objid,
	10273	uap->bufsize, realpath, &length);
	10274
	10275	if (error) {
	10276	goto out;
	10277	}
	10278
	10279	error = copyout((caddr_t)realpath, uap->buf, length);
	10280
	10281	retval = (user_ssize_t)length; / may be superseded by error */
	10282	out:
	10283	if (realpath) {
	10284	FREE(realpath, M_TEMP);
	10285	}
	10286	return (error);
	10287	}
	10288
	10289	/*
	10290	* Common routine to handle various flavors of statfs data heading out
	10291	* to user space.
	10292	*
	10293	* Returns: 0 Success
	10294	* EFAULT
	10295	*/
	10296	static int
	10297	munge_statfs(struct mount mp, struct vfsstatfs sfsp,
	10298	user_addr_t bufp, int *sizep, boolean_t is_64_bit,
	10299	boolean_t partial_copy)
	10300	{
	10301	int error;
	10302	int my_size, copy_size;
	10303
	10304	if (is_64_bit) {
	10305	struct user64_statfs sfs;
	10306	my_size = copy_size = sizeof(sfs);
	10307	bzero(&sfs, my_size);
	10308	sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	10309	sfs.f_type = mp->mnt_vtable->vfc_typenum;
	10310	sfs.f_reserved1 = (short)sfsp->f_fssubtype;
	10311	sfs.f_bsize = (user64_long_t)sfsp->f_bsize;
	10312	sfs.f_iosize = (user64_long_t)sfsp->f_iosize;
	10313	sfs.f_blocks = (user64_long_t)sfsp->f_blocks;
	10314	sfs.f_bfree = (user64_long_t)sfsp->f_bfree;
	10315	sfs.f_bavail = (user64_long_t)sfsp->f_bavail;
	10316	sfs.f_files = (user64_long_t)sfsp->f_files;
	10317	sfs.f_ffree = (user64_long_t)sfsp->f_ffree;
	10318	sfs.f_fsid = sfsp->f_fsid;
	10319	sfs.f_owner = sfsp->f_owner;
	10320	if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
	10321	strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
	10322	} else {
	10323	strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
	10324	}
	10325	strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
	10326	strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
	10327
	10328	if (partial_copy) {
	10329	copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
	10330	}
	10331	error = copyout((caddr_t)&sfs, bufp, copy_size);
	10332	}
	10333	else {
	10334	struct user32_statfs sfs;
	10335
	10336	my_size = copy_size = sizeof(sfs);
	10337	bzero(&sfs, my_size);
	10338
	10339	sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
	10340	sfs.f_type = mp->mnt_vtable->vfc_typenum;
	10341	sfs.f_reserved1 = (short)sfsp->f_fssubtype;
	10342
	10343	/*
	10344	* It's possible for there to be more than 2^^31 blocks in the filesystem, so we
	10345	* have to fudge the numbers here in that case. We inflate the blocksize in order
	10346	* to reflect the filesystem size as best we can.
	10347	*/
	10348	if ((sfsp->f_blocks > INT_MAX)
	10349	/* Hack for 4061702 . I think the real fix is for Carbon to
	10350	* look for some volume capability and not depend on hidden
	10351	* semantics agreed between a FS and carbon.
	10352	* f_blocks, f_bfree, and f_bavail set to -1 is the trigger
	10353	* for Carbon to set bNoVolumeSizes volume attribute.
	10354	* Without this the webdavfs files cannot be copied onto
	10355	* disk as they look huge. This change should not affect
	10356	* XSAN as they should not setting these to -1..
	10357	*/
	10358	&& (sfsp->f_blocks != 0xffffffffffffffffULL)
	10359	&& (sfsp->f_bfree != 0xffffffffffffffffULL)
	10360	&& (sfsp->f_bavail != 0xffffffffffffffffULL)) {
	10361	int shift;
	10362
	10363	/*
	10364	* Work out how far we have to shift the block count down to make it fit.
	10365	* Note that it's possible to have to shift so far that the resulting
	10366	* blocksize would be unreportably large. At that point, we will clip
	10367	* any values that don't fit.
	10368	*
	10369	* For safety's sake, we also ensure that f_iosize is never reported as
	10370	* being smaller than f_bsize.
	10371	*/
	10372	for (shift = 0; shift < 32; shift++) {
	10373	if ((sfsp->f_blocks >> shift) <= INT_MAX)
	10374	break;
	10375	if ((sfsp->f_bsize << (shift + 1)) > INT_MAX)
	10376	break;
	10377	}
	10378	#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s)))
	10379	sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift);
	10380	sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift);
	10381	sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift);
	10382	#undef __SHIFT_OR_CLIP
	10383	sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift);
	10384	sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize);
	10385	} else {
	10386	/* filesystem is small enough to be reported honestly */
	10387	sfs.f_bsize = (user32_long_t)sfsp->f_bsize;
	10388	sfs.f_iosize = (user32_long_t)sfsp->f_iosize;
	10389	sfs.f_blocks = (user32_long_t)sfsp->f_blocks;
	10390	sfs.f_bfree = (user32_long_t)sfsp->f_bfree;
	10391	sfs.f_bavail = (user32_long_t)sfsp->f_bavail;
	10392	}
	10393	sfs.f_files = (user32_long_t)sfsp->f_files;
	10394	sfs.f_ffree = (user32_long_t)sfsp->f_ffree;
	10395	sfs.f_fsid = sfsp->f_fsid;
	10396	sfs.f_owner = sfsp->f_owner;
	10397	if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) {
	10398	strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSNAMELEN);
	10399	} else {
	10400	strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN);
	10401	}
	10402	strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN);
	10403	strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN);
	10404
	10405	if (partial_copy) {
	10406	copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4));
	10407	}
	10408	error = copyout((caddr_t)&sfs, bufp, copy_size);
	10409	}
	10410
	10411	if (sizep != NULL) {
	10412	*sizep = my_size;
	10413	}
	10414	return(error);
	10415	}
	10416
	10417	/*
	10418	* copy stat structure into user_stat structure.
	10419	*/
	10420	void munge_user64_stat(struct stat sbp, struct user64_stat usbp)
	10421	{
	10422	bzero(usbp, sizeof(*usbp));
	10423
	10424	usbp->st_dev = sbp->st_dev;
	10425	usbp->st_ino = sbp->st_ino;
	10426	usbp->st_mode = sbp->st_mode;
	10427	usbp->st_nlink = sbp->st_nlink;
	10428	usbp->st_uid = sbp->st_uid;
	10429	usbp->st_gid = sbp->st_gid;
	10430	usbp->st_rdev = sbp->st_rdev;
	10431	#ifndef _POSIX_C_SOURCE
	10432	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
	10433	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
	10434	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
	10435	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
	10436	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
	10437	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
	10438	#else
	10439	usbp->st_atime = sbp->st_atime;
	10440	usbp->st_atimensec = sbp->st_atimensec;
	10441	usbp->st_mtime = sbp->st_mtime;
	10442	usbp->st_mtimensec = sbp->st_mtimensec;
	10443	usbp->st_ctime = sbp->st_ctime;
	10444	usbp->st_ctimensec = sbp->st_ctimensec;
	10445	#endif
	10446	usbp->st_size = sbp->st_size;
	10447	usbp->st_blocks = sbp->st_blocks;
	10448	usbp->st_blksize = sbp->st_blksize;
	10449	usbp->st_flags = sbp->st_flags;
	10450	usbp->st_gen = sbp->st_gen;
	10451	usbp->st_lspare = sbp->st_lspare;
	10452	usbp->st_qspare[0] = sbp->st_qspare[0];
	10453	usbp->st_qspare[1] = sbp->st_qspare[1];
	10454	}
	10455
	10456	void munge_user32_stat(struct stat sbp, struct user32_stat usbp)
	10457	{
	10458	bzero(usbp, sizeof(*usbp));
	10459
	10460	usbp->st_dev = sbp->st_dev;
	10461	usbp->st_ino = sbp->st_ino;
	10462	usbp->st_mode = sbp->st_mode;
	10463	usbp->st_nlink = sbp->st_nlink;
	10464	usbp->st_uid = sbp->st_uid;
	10465	usbp->st_gid = sbp->st_gid;
	10466	usbp->st_rdev = sbp->st_rdev;
	10467	#ifndef _POSIX_C_SOURCE
	10468	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
	10469	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
	10470	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
	10471	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
	10472	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
	10473	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
	10474	#else
	10475	usbp->st_atime = sbp->st_atime;
	10476	usbp->st_atimensec = sbp->st_atimensec;
	10477	usbp->st_mtime = sbp->st_mtime;
	10478	usbp->st_mtimensec = sbp->st_mtimensec;
	10479	usbp->st_ctime = sbp->st_ctime;
	10480	usbp->st_ctimensec = sbp->st_ctimensec;
	10481	#endif
	10482	usbp->st_size = sbp->st_size;
	10483	usbp->st_blocks = sbp->st_blocks;
	10484	usbp->st_blksize = sbp->st_blksize;
	10485	usbp->st_flags = sbp->st_flags;
	10486	usbp->st_gen = sbp->st_gen;
	10487	usbp->st_lspare = sbp->st_lspare;
	10488	usbp->st_qspare[0] = sbp->st_qspare[0];
	10489	usbp->st_qspare[1] = sbp->st_qspare[1];
	10490	}
	10491
	10492	/*
	10493	* copy stat64 structure into user_stat64 structure.
	10494	*/
	10495	void munge_user64_stat64(struct stat64 sbp, struct user64_stat64 usbp)
	10496	{
	10497	bzero(usbp, sizeof(*usbp));
	10498
	10499	usbp->st_dev = sbp->st_dev;
	10500	usbp->st_ino = sbp->st_ino;
	10501	usbp->st_mode = sbp->st_mode;
	10502	usbp->st_nlink = sbp->st_nlink;
	10503	usbp->st_uid = sbp->st_uid;
	10504	usbp->st_gid = sbp->st_gid;
	10505	usbp->st_rdev = sbp->st_rdev;
	10506	#ifndef _POSIX_C_SOURCE
	10507	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
	10508	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
	10509	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
	10510	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
	10511	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
	10512	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
	10513	usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
	10514	usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
	10515	#else
	10516	usbp->st_atime = sbp->st_atime;
	10517	usbp->st_atimensec = sbp->st_atimensec;
	10518	usbp->st_mtime = sbp->st_mtime;
	10519	usbp->st_mtimensec = sbp->st_mtimensec;
	10520	usbp->st_ctime = sbp->st_ctime;
	10521	usbp->st_ctimensec = sbp->st_ctimensec;
	10522	usbp->st_birthtime = sbp->st_birthtime;
	10523	usbp->st_birthtimensec = sbp->st_birthtimensec;
	10524	#endif
	10525	usbp->st_size = sbp->st_size;
	10526	usbp->st_blocks = sbp->st_blocks;
	10527	usbp->st_blksize = sbp->st_blksize;
	10528	usbp->st_flags = sbp->st_flags;
	10529	usbp->st_gen = sbp->st_gen;
	10530	usbp->st_lspare = sbp->st_lspare;
	10531	usbp->st_qspare[0] = sbp->st_qspare[0];
	10532	usbp->st_qspare[1] = sbp->st_qspare[1];
	10533	}
	10534
	10535	void munge_user32_stat64(struct stat64 sbp, struct user32_stat64 usbp)
	10536	{
	10537	bzero(usbp, sizeof(*usbp));
	10538
	10539	usbp->st_dev = sbp->st_dev;
	10540	usbp->st_ino = sbp->st_ino;
	10541	usbp->st_mode = sbp->st_mode;
	10542	usbp->st_nlink = sbp->st_nlink;
	10543	usbp->st_uid = sbp->st_uid;
	10544	usbp->st_gid = sbp->st_gid;
	10545	usbp->st_rdev = sbp->st_rdev;
	10546	#ifndef _POSIX_C_SOURCE
	10547	usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec;
	10548	usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec;
	10549	usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec;
	10550	usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec;
	10551	usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec;
	10552	usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec;
	10553	usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec;
	10554	usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec;
	10555	#else
	10556	usbp->st_atime = sbp->st_atime;
	10557	usbp->st_atimensec = sbp->st_atimensec;
	10558	usbp->st_mtime = sbp->st_mtime;
	10559	usbp->st_mtimensec = sbp->st_mtimensec;
	10560	usbp->st_ctime = sbp->st_ctime;
	10561	usbp->st_ctimensec = sbp->st_ctimensec;
	10562	usbp->st_birthtime = sbp->st_birthtime;
	10563	usbp->st_birthtimensec = sbp->st_birthtimensec;
	10564	#endif
	10565	usbp->st_size = sbp->st_size;
	10566	usbp->st_blocks = sbp->st_blocks;
	10567	usbp->st_blksize = sbp->st_blksize;
	10568	usbp->st_flags = sbp->st_flags;
	10569	usbp->st_gen = sbp->st_gen;
	10570	usbp->st_lspare = sbp->st_lspare;
	10571	usbp->st_qspare[0] = sbp->st_qspare[0];
	10572	usbp->st_qspare[1] = sbp->st_qspare[1];
	10573	}
	10574
	10575	/*
	10576	* Purge buffer cache for simulating cold starts
	10577	*/
	10578	static int vnode_purge_callback(struct vnode vp, __unused void cargs)
	10579	{
	10580	ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t resid_off /, UBC_PUSHALL \| UBC_INVALIDATE);
	10581
	10582	return VNODE_RETURNED;
	10583	}
	10584
	10585	static int vfs_purge_callback(mount_t mp, __unused void * arg)
	10586	{
	10587	vnode_iterate(mp, VNODE_WAIT \| VNODE_ITERATE_ALL, vnode_purge_callback, NULL);
	10588
	10589	return VFS_RETURNED;
	10590	}
	10591
	10592	int
	10593	vfs_purge(__unused struct proc p, __unused struct vfs_purge_args uap, __unused int32_t *retval)
	10594	{
	10595	if (!kauth_cred_issuser(kauth_cred_get()))
	10596	return EPERM;
	10597
	10598	vfs_iterate(0/* flags */, vfs_purge_callback, NULL);
	10599
	10600	return 0;
	10601	}
	10602