git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2009 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* @(#)hfs_readwrite.c 1.0
	29	*
	30	* (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
	31	*
	32	* hfs_readwrite.c -- vnode operations to deal with reading and writing files.
	33	*
	34	*/
	35
	36	#include <sys/param.h>
	37	#include <sys/systm.h>
	38	#include <sys/resourcevar.h>
	39	#include <sys/kernel.h>
	40	#include <sys/fcntl.h>
	41	#include <sys/filedesc.h>
	42	#include <sys/stat.h>
	43	#include <sys/buf.h>
	44	#include <sys/proc.h>
	45	#include <sys/kauth.h>
	46	#include <sys/vnode.h>
	47	#include <sys/vnode_internal.h>
	48	#include <sys/uio.h>
	49	#include <sys/vfs_context.h>
	50	#include <sys/fsevents.h>
	51	#include <kern/kalloc.h>
	52	#include <sys/disk.h>
	53	#include <sys/sysctl.h>
	54	#include <sys/fsctl.h>
	55
	56	#include <miscfs/specfs/specdev.h>
	57
	58	#include <sys/ubc.h>
	59	#include <sys/ubc_internal.h>
	60
	61	#include <vm/vm_pageout.h>
	62	#include <vm/vm_kern.h>
	63
	64	#include <sys/kdebug.h>
	65
	66	#include "hfs.h"
	67	#include "hfs_attrlist.h"
	68	#include "hfs_endian.h"
	69	#include "hfs_fsctl.h"
	70	#include "hfs_quota.h"
	71	#include "hfscommon/headers/FileMgrInternal.h"
	72	#include "hfscommon/headers/BTreesInternal.h"
	73	#include "hfs_cnode.h"
	74	#include "hfs_dbg.h"
	75
	76	#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
	77
	78	enum {
	79	MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
	80	};
	81
	82	/* from bsd/hfs/hfs_vfsops.c */
	83	extern int hfs_vfs_vget (struct mount mp, ino64_t ino, struct vnode *vpp, vfs_context_t context);
	84
	85	static int hfs_clonelink(struct vnode , int, kauth_cred_t, struct proc );
	86	static int hfs_clonefile(struct vnode *, int, int, int);
	87	static int hfs_clonesysfile(struct vnode , int, int, int, kauth_cred_t, struct proc );
	88	static int hfs_minorupdate(struct vnode *vp);
	89	static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
	90
	91
	92	int flush_cache_on_write = 0;
	93	SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
	94
	95
	96	/*
	97	* Read data from a file.
	98	*/
	99	int
	100	hfs_vnop_read(struct vnop_read_args *ap)
	101	{
	102	uio_t uio = ap->a_uio;
	103	struct vnode *vp = ap->a_vp;
	104	struct cnode *cp;
	105	struct filefork *fp;
	106	struct hfsmount *hfsmp;
	107	off_t filesize;
	108	off_t filebytes;
	109	off_t start_resid = uio_resid(uio);
	110	off_t offset = uio_offset(uio);
	111	int retval = 0;
	112
	113	/* Preflight checks */
	114	if (!vnode_isreg(vp)) {
	115	/* can only read regular files */
	116	if (vnode_isdir(vp))
	117	return (EISDIR);
	118	else
	119	return (EPERM);
	120	}
	121	if (start_resid == 0)
	122	return (0); /* Nothing left to do */
	123	if (offset < 0)
	124	return (EINVAL); /* cant read from a negative offset */
	125
	126	#if HFS_COMPRESSION
	127	if (VNODE_IS_RSRC(vp)) {
	128	if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
	129	return 0;
	130	}
	131	/* otherwise read the resource fork normally */
	132	} else {
	133	int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
	134	if (compressed) {
	135	retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
	136	if (compressed) {
	137	if (retval == 0) {
	138	/* successful read, update the access time */
	139	VTOC(vp)->c_touch_acctime = TRUE;
	140
	141	/* compressed files are not hot file candidates */
	142	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
	143	VTOF(vp)->ff_bytesread = 0;
	144	}
	145	}
	146	return retval;
	147	}
	148	/* otherwise the file was converted back to a regular file while we were reading it */
	149	retval = 0;
	150	}
	151	}
	152	#endif /* HFS_COMPRESSION */
	153
	154	cp = VTOC(vp);
	155	fp = VTOF(vp);
	156	hfsmp = VTOHFS(vp);
	157
	158	/* Protect against a size change. */
	159	hfs_lock_truncate(cp, 0);
	160
	161	filesize = fp->ff_size;
	162	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	163	if (offset > filesize) {
	164	if ((hfsmp->hfs_flags & HFS_STANDARD) &&
	165	(offset > (off_t)MAXHFSFILESIZE)) {
	166	retval = EFBIG;
	167	}
	168	goto exit;
	169	}
	170
	171	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_START,
	172	(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
	173
	174	retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
	175
	176	cp->c_touch_acctime = TRUE;
	177
	178	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_END,
	179	(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
	180
	181	/*
	182	* Keep track blocks read
	183	*/
	184	if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
	185	int took_cnode_lock = 0;
	186	off_t bytesread;
	187
	188	bytesread = start_resid - uio_resid(uio);
	189
	190	/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
	191	if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
	192	hfs_lock(cp, HFS_FORCE_LOCK);
	193	took_cnode_lock = 1;
	194	}
	195	/*
	196	* If this file hasn't been seen since the start of
	197	* the current sampling period then start over.
	198	*/
	199	if (cp->c_atime < hfsmp->hfc_timebase) {
	200	struct timeval tv;
	201
	202	fp->ff_bytesread = bytesread;
	203	microtime(&tv);
	204	cp->c_atime = tv.tv_sec;
	205	} else {
	206	fp->ff_bytesread += bytesread;
	207	}
	208	if (took_cnode_lock)
	209	hfs_unlock(cp);
	210	}
	211	exit:
	212	hfs_unlock_truncate(cp, 0);
	213	return (retval);
	214	}
	215
	216	/*
	217	* Write data to a file.
	218	*/
	219	int
	220	hfs_vnop_write(struct vnop_write_args *ap)
	221	{
	222	uio_t uio = ap->a_uio;
	223	struct vnode *vp = ap->a_vp;
	224	struct cnode *cp;
	225	struct filefork *fp;
	226	struct hfsmount *hfsmp;
	227	kauth_cred_t cred = NULL;
	228	off_t origFileSize;
	229	off_t writelimit;
	230	off_t bytesToAdd = 0;
	231	off_t actualBytesAdded;
	232	off_t filebytes;
	233	off_t offset;
	234	ssize_t resid;
	235	int eflags;
	236	int ioflag = ap->a_ioflag;
	237	int retval = 0;
	238	int lockflags;
	239	int cnode_locked = 0;
	240	int partialwrite = 0;
	241	int exclusive_lock = 0;
	242
	243	#if HFS_COMPRESSION
	244	if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
	245	int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
	246	switch(state) {
	247	case FILE_IS_COMPRESSED:
	248	return EACCES;
	249	case FILE_IS_CONVERTING:
	250	/* if FILE_IS_CONVERTING, we allow writes */
	251	break;
	252	default:
	253	printf("invalid state %d for compressed file\n", state);
	254	/* fall through */
	255	}
	256	}
	257	#endif
	258
	259	// LP64todo - fix this! uio_resid may be 64-bit value
	260	resid = uio_resid(uio);
	261	offset = uio_offset(uio);
	262
	263	if (ioflag & IO_APPEND) {
	264	exclusive_lock = 1;
	265	}
	266
	267	if (offset < 0)
	268	return (EINVAL);
	269	if (resid == 0)
	270	return (E_NONE);
	271	if (!vnode_isreg(vp))
	272	return (EPERM); /* Can only write regular files */
	273
	274	cp = VTOC(vp);
	275	fp = VTOF(vp);
	276	hfsmp = VTOHFS(vp);
	277
	278	eflags = kEFDeferMask; /* defer file block allocations */
	279	#ifdef HFS_SPARSE_DEV
	280	/*
	281	* When the underlying device is sparse and space
	282	* is low (< 8MB), stop doing delayed allocations
	283	* and begin doing synchronous I/O.
	284	*/
	285	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	286	(hfs_freeblks(hfsmp, 0) < 2048)) {
	287	eflags &= ~kEFDeferMask;
	288	ioflag \|= IO_SYNC;
	289	}
	290	#endif /* HFS_SPARSE_DEV */
	291
	292	again:
	293	/* Protect against a size change. */
	294	hfs_lock_truncate(cp, exclusive_lock);
	295
	296	if (ioflag & IO_APPEND) {
	297	uio_setoffset(uio, fp->ff_size);
	298	offset = fp->ff_size;
	299	}
	300	if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
	301	retval = EPERM;
	302	goto exit;
	303	}
	304
	305	origFileSize = fp->ff_size;
	306	writelimit = offset + resid;
	307	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	308
	309	/* If the truncate lock is shared, and if we either have virtual
	310	* blocks or will need to extend the file, upgrade the truncate
	311	* to exclusive lock. If upgrade fails, we lose the lock and
	312	* have to get exclusive lock again. Note that we want to
	313	* grab the truncate lock exclusive even if we're not allocating new blocks
	314	* because we could still be growing past the LEOF.
	315	*/
	316	if ((exclusive_lock == 0) &&
	317	((fp->ff_unallocblocks != 0) \|\| (writelimit > origFileSize))) {
	318	exclusive_lock = 1;
	319	/* Lock upgrade failed and we lost our shared lock, try again */
	320	if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
	321	goto again;
	322	}
	323	}
	324
	325	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
	326	goto exit;
	327	}
	328	cnode_locked = 1;
	329
	330	if (!exclusive_lock) {
	331	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_START,
	332	(int)offset, uio_resid(uio), (int)fp->ff_size,
	333	(int)filebytes, 0);
	334	}
	335
	336	/* Check if we do not need to extend the file */
	337	if (writelimit <= filebytes) {
	338	goto sizeok;
	339	}
	340
	341	cred = vfs_context_ucred(ap->a_context);
	342	bytesToAdd = writelimit - filebytes;
	343
	344	#if QUOTA
	345	retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
	346	cred, 0);
	347	if (retval)
	348	goto exit;
	349	#endif /* QUOTA */
	350
	351	if (hfs_start_transaction(hfsmp) != 0) {
	352	retval = EINVAL;
	353	goto exit;
	354	}
	355
	356	while (writelimit > filebytes) {
	357	bytesToAdd = writelimit - filebytes;
	358	if (cred && suser(cred, NULL) != 0)
	359	eflags \|= kEFReserveMask;
	360
	361	/* Protect extents b-tree and allocation bitmap */
	362	lockflags = SFL_BITMAP;
	363	if (overflow_extents(fp))
	364	lockflags \|= SFL_EXTENTS;
	365	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	366
	367	/* Files that are changing size are not hot file candidates. */
	368	if (hfsmp->hfc_stage == HFC_RECORDING) {
	369	fp->ff_bytesread = 0;
	370	}
	371	retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
	372	0, eflags, &actualBytesAdded));
	373
	374	hfs_systemfile_unlock(hfsmp, lockflags);
	375
	376	if ((actualBytesAdded == 0) && (retval == E_NONE))
	377	retval = ENOSPC;
	378	if (retval != E_NONE)
	379	break;
	380	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	381	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_NONE,
	382	(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
	383	}
	384	(void) hfs_update(vp, TRUE);
	385	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	386	(void) hfs_end_transaction(hfsmp);
	387
	388	/*
	389	* If we didn't grow the file enough try a partial write.
	390	* POSIX expects this behavior.
	391	*/
	392	if ((retval == ENOSPC) && (filebytes > offset)) {
	393	retval = 0;
	394	partialwrite = 1;
	395	uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
	396	resid -= bytesToAdd;
	397	writelimit = filebytes;
	398	}
	399	sizeok:
	400	if (retval == E_NONE) {
	401	off_t filesize;
	402	off_t zero_off;
	403	off_t tail_off;
	404	off_t inval_start;
	405	off_t inval_end;
	406	off_t io_start;
	407	int lflag;
	408	struct rl_entry *invalid_range;
	409
	410	if (writelimit > fp->ff_size)
	411	filesize = writelimit;
	412	else
	413	filesize = fp->ff_size;
	414
	415	lflag = ioflag & ~(IO_TAILZEROFILL \| IO_HEADZEROFILL \| IO_NOZEROVALID \| IO_NOZERODIRTY);
	416
	417	if (offset <= fp->ff_size) {
	418	zero_off = offset & ~PAGE_MASK_64;
	419
	420	/* Check to see whether the area between the zero_offset and the start
	421	of the transfer to see whether is invalid and should be zero-filled
	422	as part of the transfer:
	423	*/
	424	if (offset > zero_off) {
	425	if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
	426	lflag \|= IO_HEADZEROFILL;
	427	}
	428	} else {
	429	off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
	430
	431	/* The bytes between fp->ff_size and uio->uio_offset must never be
	432	read without being zeroed. The current last block is filled with zeroes
	433	if it holds valid data but in all cases merely do a little bookkeeping
	434	to track the area from the end of the current last page to the start of
	435	the area actually written. For the same reason only the bytes up to the
	436	start of the page where this write will start is invalidated; any remainder
	437	before uio->uio_offset is explicitly zeroed as part of the cluster_write.
	438
	439	Note that inval_start, the start of the page after the current EOF,
	440	may be past the start of the write, in which case the zeroing
	441	will be handled by the cluser_write of the actual data.
	442	*/
	443	inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	444	inval_end = offset & ~PAGE_MASK_64;
	445	zero_off = fp->ff_size;
	446
	447	if ((fp->ff_size & PAGE_MASK_64) &&
	448	(rl_scan(&fp->ff_invalidranges,
	449	eof_page_base,
	450	fp->ff_size - 1,
	451	&invalid_range) != RL_NOOVERLAP)) {
	452	/* The page containing the EOF is not valid, so the
	453	entire page must be made inaccessible now. If the write
	454	starts on a page beyond the page containing the eof
	455	(inval_end > eof_page_base), add the
	456	whole page to the range to be invalidated. Otherwise
	457	(i.e. if the write starts on the same page), zero-fill
	458	the entire page explicitly now:
	459	*/
	460	if (inval_end > eof_page_base) {
	461	inval_start = eof_page_base;
	462	} else {
	463	zero_off = eof_page_base;
	464	};
	465	};
	466
	467	if (inval_start < inval_end) {
	468	struct timeval tv;
	469	/* There's some range of data that's going to be marked invalid */
	470
	471	if (zero_off < inval_start) {
	472	/* The pages between inval_start and inval_end are going to be invalidated,
	473	and the actual write will start on a page past inval_end. Now's the last
	474	chance to zero-fill the page containing the EOF:
	475	*/
	476	hfs_unlock(cp);
	477	cnode_locked = 0;
	478	retval = cluster_write(vp, (uio_t) 0,
	479	fp->ff_size, inval_start,
	480	zero_off, (off_t)0,
	481	lflag \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	482	hfs_lock(cp, HFS_FORCE_LOCK);
	483	cnode_locked = 1;
	484	if (retval) goto ioerr_exit;
	485	offset = uio_offset(uio);
	486	};
	487
	488	/* Mark the remaining area of the newly allocated space as invalid: */
	489	rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
	490	microuptime(&tv);
	491	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	492	zero_off = fp->ff_size = inval_end;
	493	};
	494
	495	if (offset > zero_off) lflag \|= IO_HEADZEROFILL;
	496	};
	497
	498	/* Check to see whether the area between the end of the write and the end of
	499	the page it falls in is invalid and should be zero-filled as part of the transfer:
	500	*/
	501	tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	502	if (tail_off > filesize) tail_off = filesize;
	503	if (tail_off > writelimit) {
	504	if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
	505	lflag \|= IO_TAILZEROFILL;
	506	};
	507	};
	508
	509	/*
	510	* if the write starts beyond the current EOF (possibly advanced in the
	511	* zeroing of the last block, above), then we'll zero fill from the current EOF
	512	* to where the write begins:
	513	*
	514	* NOTE: If (and ONLY if) the portion of the file about to be written is
	515	* before the current EOF it might be marked as invalid now and must be
	516	* made readable (removed from the invalid ranges) before cluster_write
	517	* tries to write it:
	518	*/
	519	io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
	520	if (io_start < fp->ff_size) {
	521	off_t io_end;
	522
	523	io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
	524	rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
	525	};
	526
	527	hfs_unlock(cp);
	528	cnode_locked = 0;
	529
	530	/*
	531	* We need to tell UBC the fork's new size BEFORE calling
	532	* cluster_write, in case any of the new pages need to be
	533	* paged out before cluster_write completes (which does happen
	534	* in embedded systems due to extreme memory pressure).
	535	* Similarly, we need to tell hfs_vnop_pageout what the new EOF
	536	* will be, so that it can pass that on to cluster_pageout, and
	537	* allow those pageouts.
	538	*
	539	* We don't update ff_size yet since we don't want pageins to
	540	* be able to see uninitialized data between the old and new
	541	* EOF, until cluster_write has completed and initialized that
	542	* part of the file.
	543	*
	544	* The vnode pager relies on the file size last given to UBC via
	545	* ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
	546	* ff_size (whichever is larger). NOTE: ff_new_size is always
	547	* zero, unless we are extending the file via write.
	548	*/
	549	if (filesize > fp->ff_size) {
	550	fp->ff_new_size = filesize;
	551	ubc_setsize(vp, filesize);
	552	}
	553	retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
	554	tail_off, lflag \| IO_NOZERODIRTY);
	555	if (retval) {
	556	fp->ff_new_size = 0; /* no longer extending; use ff_size */
	557	if (filesize > origFileSize) {
	558	ubc_setsize(vp, origFileSize);
	559	}
	560	goto ioerr_exit;
	561	}
	562
	563	if (filesize > origFileSize) {
	564	fp->ff_size = filesize;
	565
	566	/* Files that are changing size are not hot file candidates. */
	567	if (hfsmp->hfc_stage == HFC_RECORDING) {
	568	fp->ff_bytesread = 0;
	569	}
	570	}
	571	fp->ff_new_size = 0; /* ff_size now has the correct size */
	572
	573	/* If we wrote some bytes, then touch the change and mod times */
	574	if (resid > uio_resid(uio)) {
	575	cp->c_touch_chgtime = TRUE;
	576	cp->c_touch_modtime = TRUE;
	577	}
	578	}
	579	if (partialwrite) {
	580	uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
	581	resid += bytesToAdd;
	582	}
	583
	584	// XXXdbg - see radar 4871353 for more info
	585	{
	586	if (flush_cache_on_write && ((ioflag & IO_NOCACHE) \|\| vnode_isnocache(vp))) {
	587	VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
	588	}
	589	}
	590
	591	ioerr_exit:
	592	/*
	593	* If we successfully wrote any data, and we are not the superuser
	594	* we clear the setuid and setgid bits as a precaution against
	595	* tampering.
	596	*/
	597	if (cp->c_mode & (S_ISUID \| S_ISGID)) {
	598	cred = vfs_context_ucred(ap->a_context);
	599	if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
	600	if (!cnode_locked) {
	601	hfs_lock(cp, HFS_FORCE_LOCK);
	602	cnode_locked = 1;
	603	}
	604	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	605	}
	606	}
	607	if (retval) {
	608	if (ioflag & IO_UNIT) {
	609	if (!cnode_locked) {
	610	hfs_lock(cp, HFS_FORCE_LOCK);
	611	cnode_locked = 1;
	612	}
	613	(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
	614	0, 0, ap->a_context);
	615	// LP64todo - fix this! resid needs to by user_ssize_t
	616	uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
	617	uio_setresid(uio, resid);
	618	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	619	}
	620	} else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
	621	if (!cnode_locked) {
	622	hfs_lock(cp, HFS_FORCE_LOCK);
	623	cnode_locked = 1;
	624	}
	625	retval = hfs_update(vp, TRUE);
	626	}
	627	/* Updating vcbWrCnt doesn't need to be atomic. */
	628	hfsmp->vcbWrCnt++;
	629
	630	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_END,
	631	(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
	632	exit:
	633	if (cnode_locked)
	634	hfs_unlock(cp);
	635	hfs_unlock_truncate(cp, exclusive_lock);
	636	return (retval);
	637	}
	638
	639	/* support for the "bulk-access" fcntl */
	640
	641	#define CACHE_LEVELS 16
	642	#define NUM_CACHE_ENTRIES (64*16)
	643	#define PARENT_IDS_FLAG 0x100
	644
	645	struct access_cache {
	646	int numcached;
	647	int cachehits; /* these two for statistics gathering */
	648	int lookups;
	649	unsigned int *acache;
	650	unsigned char *haveaccess;
	651	};
	652
	653	struct access_t {
	654	uid_t uid; /* IN: effective user id */
	655	short flags; /* IN: access requested (i.e. R_OK) */
	656	short num_groups; /* IN: number of groups user belongs to */
	657	int num_files; /* IN: number of files to process */
	658	int file_ids; / IN: array of file ids */
	659	gid_t groups; / IN: array of groups */
	660	short access; / OUT: access info for each file (0 for 'has access') */
	661	} __attribute__((unavailable)); // this structure is for reference purposes only
	662
	663	struct user32_access_t {
	664	uid_t uid; /* IN: effective user id */
	665	short flags; /* IN: access requested (i.e. R_OK) */
	666	short num_groups; /* IN: number of groups user belongs to */
	667	int num_files; /* IN: number of files to process */
	668	user32_addr_t file_ids; /* IN: array of file ids */
	669	user32_addr_t groups; /* IN: array of groups */
	670	user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
	671	};
	672
	673	struct user64_access_t {
	674	uid_t uid; /* IN: effective user id */
	675	short flags; /* IN: access requested (i.e. R_OK) */
	676	short num_groups; /* IN: number of groups user belongs to */
	677	int num_files; /* IN: number of files to process */
	678	user64_addr_t file_ids; /* IN: array of file ids */
	679	user64_addr_t groups; /* IN: array of groups */
	680	user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
	681	};
	682
	683
	684	// these are the "extended" versions of the above structures
	685	// note that it is crucial that they be different sized than
	686	// the regular version
	687	struct ext_access_t {
	688	uint32_t flags; /* IN: access requested (i.e. R_OK) */
	689	uint32_t num_files; /* IN: number of files to process */
	690	uint32_t map_size; /* IN: size of the bit map */
	691	uint32_t file_ids; / IN: Array of file ids */
	692	char bitmap; / OUT: hash-bitmap of interesting directory ids */
	693	short access; / OUT: access info for each file (0 for 'has access') */
	694	uint32_t num_parents; /* future use */
	695	cnid_t parents; / future use */
	696	} __attribute__((unavailable)); // this structure is for reference purposes only
	697
	698	struct user32_ext_access_t {
	699	uint32_t flags; /* IN: access requested (i.e. R_OK) */
	700	uint32_t num_files; /* IN: number of files to process */
	701	uint32_t map_size; /* IN: size of the bit map */
	702	user32_addr_t file_ids; /* IN: Array of file ids */
	703	user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
	704	user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
	705	uint32_t num_parents; /* future use */
	706	user32_addr_t parents; /* future use */
	707	};
	708
	709	struct user64_ext_access_t {
	710	uint32_t flags; /* IN: access requested (i.e. R_OK) */
	711	uint32_t num_files; /* IN: number of files to process */
	712	uint32_t map_size; /* IN: size of the bit map */
	713	user64_addr_t file_ids; /* IN: array of file ids */
	714	user64_addr_t bitmap; /* IN: array of groups */
	715	user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
	716	uint32_t num_parents;/* future use */
	717	user64_addr_t parents;/* future use */
	718	};
	719
	720
	721	/*
	722	* Perform a binary search for the given parent_id. Return value is
	723	* the index if there is a match. If no_match_indexp is non-NULL it
	724	* will be assigned with the index to insert the item (even if it was
	725	* not found).
	726	*/
	727	static int cache_binSearch(cnid_t array, unsigned int hi, cnid_t parent_id, int no_match_indexp)
	728	{
	729	int index=-1;
	730	unsigned int lo=0;
	731
	732	do {
	733	unsigned int mid = ((hi - lo)/2) + lo;
	734	unsigned int this_id = array[mid];
	735
	736	if (parent_id == this_id) {
	737	hi = mid;
	738	break;
	739	}
	740
	741	if (parent_id < this_id) {
	742	hi = mid;
	743	continue;
	744	}
	745
	746	if (parent_id > this_id) {
	747	lo = mid + 1;
	748	continue;
	749	}
	750	} while(lo < hi);
	751
	752	/* check if lo and hi converged on the match */
	753	if (parent_id == array[hi]) {
	754	index = hi;
	755	}
	756
	757	if (no_match_indexp) {
	758	*no_match_indexp = hi;
	759	}
	760
	761	return index;
	762	}
	763
	764
	765	static int
	766	lookup_bucket(struct access_cache cache, int indexp, cnid_t parent_id)
	767	{
	768	unsigned int hi;
	769	int matches = 0;
	770	int index, no_match_index;
	771
	772	if (cache->numcached == 0) {
	773	*indexp = 0;
	774	return 0; // table is empty, so insert at index=0 and report no match
	775	}
	776
	777	if (cache->numcached > NUM_CACHE_ENTRIES) {
	778	/*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
	779	cache->numcached, NUM_CACHE_ENTRIES);*/
	780	cache->numcached = NUM_CACHE_ENTRIES;
	781	}
	782
	783	hi = cache->numcached - 1;
	784
	785	index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
	786
	787	/* if no existing entry found, find index for new one */
	788	if (index == -1) {
	789	index = no_match_index;
	790	matches = 0;
	791	} else {
	792	matches = 1;
	793	}
	794
	795	*indexp = index;
	796	return matches;
	797	}
	798
	799	/*
	800	* Add a node to the access_cache at the given index (or do a lookup first
	801	* to find the index if -1 is passed in). We currently do a replace rather
	802	* than an insert if the cache is full.
	803	*/
	804	static void
	805	add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
	806	{
	807	int lookup_index = -1;
	808
	809	/* need to do a lookup first if -1 passed for index */
	810	if (index == -1) {
	811	if (lookup_bucket(cache, &lookup_index, nodeID)) {
	812	if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
	813	// only update an entry if the previous access was ESRCH (i.e. a scope checking error)
	814	cache->haveaccess[lookup_index] = access;
	815	}
	816
	817	/* mission accomplished */
	818	return;
	819	} else {
	820	index = lookup_index;
	821	}
	822
	823	}
	824
	825	/* if the cache is full, do a replace rather than an insert */
	826	if (cache->numcached >= NUM_CACHE_ENTRIES) {
	827	//printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
	828	cache->numcached = NUM_CACHE_ENTRIES-1;
	829
	830	if (index > cache->numcached) {
	831	// printf("hfs: index %d pinned to %d\n", index, cache->numcached);
	832	index = cache->numcached;
	833	}
	834	}
	835
	836	if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
	837	index++;
	838	}
	839
	840	if (index >= 0 && index < cache->numcached) {
	841	/* only do bcopy if we're inserting */
	842	bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
	843	bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
	844	}
	845
	846	cache->acache[index] = nodeID;
	847	cache->haveaccess[index] = access;
	848	cache->numcached++;
	849	}
	850
	851
	852	struct cinfo {
	853	uid_t uid;
	854	gid_t gid;
	855	mode_t mode;
	856	cnid_t parentcnid;
	857	u_int16_t recflags;
	858	};
	859
	860	static int
	861	snoop_callback(const struct cat_desc descp, const struct cat_attr attrp, void * arg)
	862	{
	863	struct cinfo cip = (struct cinfo )arg;
	864
	865	cip->uid = attrp->ca_uid;
	866	cip->gid = attrp->ca_gid;
	867	cip->mode = attrp->ca_mode;
	868	cip->parentcnid = descp->cd_parentcnid;
	869	cip->recflags = attrp->ca_recflags;
	870
	871	return (0);
	872	}
	873
	874	/*
	875	* Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
	876	* isn't incore, then go to the catalog.
	877	*/
	878	static int
	879	do_attr_lookup(struct hfsmount hfsmp, struct access_cache cache, cnid_t cnid,
	880	struct cnode skip_cp, CatalogKey keyp, struct cat_attr *cnattrp)
	881	{
	882	int error = 0;
	883
	884	/* if this id matches the one the fsctl was called with, skip the lookup */
	885	if (cnid == skip_cp->c_cnid) {
	886	cnattrp->ca_uid = skip_cp->c_uid;
	887	cnattrp->ca_gid = skip_cp->c_gid;
	888	cnattrp->ca_mode = skip_cp->c_mode;
	889	cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
	890	keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
	891	} else {
	892	struct cinfo c_info;
	893
	894	/* otherwise, check the cnode hash incase the file/dir is incore */
	895	if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
	896	cnattrp->ca_uid = c_info.uid;
	897	cnattrp->ca_gid = c_info.gid;
	898	cnattrp->ca_mode = c_info.mode;
	899	cnattrp->ca_recflags = c_info.recflags;
	900	keyp->hfsPlus.parentID = c_info.parentcnid;
	901	} else {
	902	int lockflags;
	903
	904	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
	905
	906	/* lookup this cnid in the catalog */
	907	error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
	908
	909	hfs_systemfile_unlock(hfsmp, lockflags);
	910
	911	cache->lookups++;
	912	}
	913	}
	914
	915	return (error);
	916	}
	917
	918
	919	/*
	920	* Compute whether we have access to the given directory (nodeID) and all its parents. Cache
	921	* up to CACHE_LEVELS as we progress towards the root.
	922	*/
	923	static int
	924	do_access_check(struct hfsmount hfsmp, int err, struct access_cache *cache, HFSCatalogNodeID nodeID,
	925	struct cnode skip_cp, struct proc theProcPtr, kauth_cred_t myp_ucred,
	926	struct vfs_context *my_context,
	927	char *bitmap,
	928	uint32_t map_size,
	929	cnid_t* parents,
	930	uint32_t num_parents)
	931	{
	932	int myErr = 0;
	933	int myResult;
	934	HFSCatalogNodeID thisNodeID;
	935	unsigned int myPerms;
	936	struct cat_attr cnattr;
	937	int cache_index = -1, scope_index = -1, scope_idx_start = -1;
	938	CatalogKey catkey;
	939
	940	int i = 0, ids_to_cache = 0;
	941	int parent_ids[CACHE_LEVELS];
	942
	943	thisNodeID = nodeID;
	944	while (thisNodeID >= kRootDirID) {
	945	myResult = 0; /* default to "no access" */
	946
	947	/* check the cache before resorting to hitting the catalog */
	948
	949	/* ASSUMPTION: access info of cached entries is "final"... i.e. no need
	950	* to look any further after hitting cached dir */
	951
	952	if (lookup_bucket(cache, &cache_index, thisNodeID)) {
	953	cache->cachehits++;
	954	myErr = cache->haveaccess[cache_index];
	955	if (scope_index != -1) {
	956	if (myErr == ESRCH) {
	957	myErr = 0;
	958	}
	959	} else {
	960	scope_index = 0; // so we'll just use the cache result
	961	scope_idx_start = ids_to_cache;
	962	}
	963	myResult = (myErr == 0) ? 1 : 0;
	964	goto ExitThisRoutine;
	965	}
	966
	967
	968	if (parents) {
	969	int tmp;
	970	tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
	971	if (scope_index == -1)
	972	scope_index = tmp;
	973	if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
	974	scope_idx_start = ids_to_cache;
	975	}
	976	}
	977
	978	/* remember which parents we want to cache */
	979	if (ids_to_cache < CACHE_LEVELS) {
	980	parent_ids[ids_to_cache] = thisNodeID;
	981	ids_to_cache++;
	982	}
	983	// Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
	984	if (bitmap && map_size) {
	985	bitmap[(thisNodeID/8)%(map_size)]\|=(1<<(thisNodeID&7));
	986	}
	987
	988
	989	/* do the lookup (checks the cnode hash, then the catalog) */
	990	myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
	991	if (myErr) {
	992	goto ExitThisRoutine; /* no access */
	993	}
	994
	995	/* Root always gets access. */
	996	if (suser(myp_ucred, NULL) == 0) {
	997	thisNodeID = catkey.hfsPlus.parentID;
	998	myResult = 1;
	999	continue;
	1000	}
	1001
	1002	// if the thing has acl's, do the full permission check
	1003	if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
	1004	struct vnode *vp;
	1005
	1006	/* get the vnode for this cnid */
	1007	myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
	1008	if ( myErr ) {
	1009	myResult = 0;
	1010	goto ExitThisRoutine;
	1011	}
	1012
	1013	thisNodeID = VTOC(vp)->c_parentcnid;
	1014
	1015	hfs_unlock(VTOC(vp));
	1016
	1017	if (vnode_vtype(vp) == VDIR) {
	1018	myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH \| KAUTH_VNODE_LIST_DIRECTORY), my_context);
	1019	} else {
	1020	myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
	1021	}
	1022
	1023	vnode_put(vp);
	1024	if (myErr) {
	1025	myResult = 0;
	1026	goto ExitThisRoutine;
	1027	}
	1028	} else {
	1029	unsigned int flags;
	1030
	1031	myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
	1032	cnattr.ca_mode, hfsmp->hfs_mp,
	1033	myp_ucred, theProcPtr);
	1034
	1035	if (cnattr.ca_mode & S_IFDIR) {
	1036	flags = R_OK \| X_OK;
	1037	} else {
	1038	flags = R_OK;
	1039	}
	1040	if ( (myPerms & flags) != flags) {
	1041	myResult = 0;
	1042	myErr = EACCES;
	1043	goto ExitThisRoutine; /* no access */
	1044	}
	1045
	1046	/* up the hierarchy we go */
	1047	thisNodeID = catkey.hfsPlus.parentID;
	1048	}
	1049	}
	1050
	1051	/* if here, we have access to this node */
	1052	myResult = 1;
	1053
	1054	ExitThisRoutine:
	1055	if (parents && myErr == 0 && scope_index == -1) {
	1056	myErr = ESRCH;
	1057	}
	1058
	1059	if (myErr) {
	1060	myResult = 0;
	1061	}
	1062	*err = myErr;
	1063
	1064	/* cache the parent directory(ies) */
	1065	for (i = 0; i < ids_to_cache; i++) {
	1066	if (myErr == 0 && parents && (scope_idx_start == -1 \|\| i > scope_idx_start)) {
	1067	add_node(cache, -1, parent_ids[i], ESRCH);
	1068	} else {
	1069	add_node(cache, -1, parent_ids[i], myErr);
	1070	}
	1071	}
	1072
	1073	return (myResult);
	1074	}
	1075
	1076	static int
	1077	do_bulk_access_check(struct hfsmount hfsmp, struct vnode vp,
	1078	struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
	1079	{
	1080	boolean_t is64bit;
	1081
	1082	/*
	1083	* NOTE: on entry, the vnode is locked. Incase this vnode
	1084	* happens to be in our list of file_ids, we'll note it
	1085	* avoid calling hfs_chashget_nowait() on that id as that
	1086	* will cause a "locking against myself" panic.
	1087	*/
	1088	Boolean check_leaf = true;
	1089
	1090	struct user64_ext_access_t *user_access_structp;
	1091	struct user64_ext_access_t tmp_user_access;
	1092	struct access_cache cache;
	1093
	1094	int error = 0, prev_parent_check_ok=1;
	1095	unsigned int i;
	1096
	1097	short flags;
	1098	unsigned int num_files = 0;
	1099	int map_size = 0;
	1100	int num_parents = 0;
	1101	int *file_ids=NULL;
	1102	short *access=NULL;
	1103	char *bitmap=NULL;
	1104	cnid_t *parents=NULL;
	1105	int leaf_index;
	1106
	1107	cnid_t cnid;
	1108	cnid_t prevParent_cnid = 0;
	1109	unsigned int myPerms;
	1110	short myaccess = 0;
	1111	struct cat_attr cnattr;
	1112	CatalogKey catkey;
	1113	struct cnode *skip_cp = VTOC(vp);
	1114	kauth_cred_t cred = vfs_context_ucred(context);
	1115	proc_t p = vfs_context_proc(context);
	1116
	1117	is64bit = proc_is64bit(p);
	1118
	1119	/* initialize the local cache and buffers */
	1120	cache.numcached = 0;
	1121	cache.cachehits = 0;
	1122	cache.lookups = 0;
	1123	cache.acache = NULL;
	1124	cache.haveaccess = NULL;
	1125
	1126	/* struct copyin done during dispatch... need to copy file_id array separately */
	1127	if (ap->a_data == NULL) {
	1128	error = EINVAL;
	1129	goto err_exit_bulk_access;
	1130	}
	1131
	1132	if (is64bit) {
	1133	if (arg_size != sizeof(struct user64_ext_access_t)) {
	1134	error = EINVAL;
	1135	goto err_exit_bulk_access;
	1136	}
	1137
	1138	user_access_structp = (struct user64_ext_access_t *)ap->a_data;
	1139
	1140	} else if (arg_size == sizeof(struct user32_access_t)) {
	1141	struct user32_access_t accessp = (struct user32_access_t )ap->a_data;
	1142
	1143	// convert an old style bulk-access struct to the new style
	1144	tmp_user_access.flags = accessp->flags;
	1145	tmp_user_access.num_files = accessp->num_files;
	1146	tmp_user_access.map_size = 0;
	1147	tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
	1148	tmp_user_access.bitmap = USER_ADDR_NULL;
	1149	tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
	1150	tmp_user_access.num_parents = 0;
	1151	user_access_structp = &tmp_user_access;
	1152
	1153	} else if (arg_size == sizeof(struct user32_ext_access_t)) {
	1154	struct user32_ext_access_t accessp = (struct user32_ext_access_t )ap->a_data;
	1155
	1156	// up-cast from a 32-bit version of the struct
	1157	tmp_user_access.flags = accessp->flags;
	1158	tmp_user_access.num_files = accessp->num_files;
	1159	tmp_user_access.map_size = accessp->map_size;
	1160	tmp_user_access.num_parents = accessp->num_parents;
	1161
	1162	tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
	1163	tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
	1164	tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
	1165	tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
	1166
	1167	user_access_structp = &tmp_user_access;
	1168	} else {
	1169	error = EINVAL;
	1170	goto err_exit_bulk_access;
	1171	}
	1172
	1173	map_size = user_access_structp->map_size;
	1174
	1175	num_files = user_access_structp->num_files;
	1176
	1177	num_parents= user_access_structp->num_parents;
	1178
	1179	if (num_files < 1) {
	1180	goto err_exit_bulk_access;
	1181	}
	1182	if (num_files > 1024) {
	1183	error = EINVAL;
	1184	goto err_exit_bulk_access;
	1185	}
	1186
	1187	if (num_parents > 1024) {
	1188	error = EINVAL;
	1189	goto err_exit_bulk_access;
	1190	}
	1191
	1192	file_ids = (int ) kalloc(sizeof(int) num_files);
	1193	access = (short ) kalloc(sizeof(short) num_files);
	1194	if (map_size) {
	1195	bitmap = (char ) kalloc(sizeof(char) map_size);
	1196	}
	1197
	1198	if (num_parents) {
	1199	parents = (cnid_t ) kalloc(sizeof(cnid_t) num_parents);
	1200	}
	1201
	1202	cache.acache = (unsigned int ) kalloc(sizeof(int) NUM_CACHE_ENTRIES);
	1203	cache.haveaccess = (unsigned char ) kalloc(sizeof(unsigned char) NUM_CACHE_ENTRIES);
	1204
	1205	if (file_ids == NULL \|\| access == NULL \|\| (map_size != 0 && bitmap == NULL) \|\| cache.acache == NULL \|\| cache.haveaccess == NULL) {
	1206	if (file_ids) {
	1207	kfree(file_ids, sizeof(int) * num_files);
	1208	}
	1209	if (bitmap) {
	1210	kfree(bitmap, sizeof(char) * map_size);
	1211	}
	1212	if (access) {
	1213	kfree(access, sizeof(short) * num_files);
	1214	}
	1215	if (cache.acache) {
	1216	kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
	1217	}
	1218	if (cache.haveaccess) {
	1219	kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
	1220	}
	1221	if (parents) {
	1222	kfree(parents, sizeof(cnid_t) * num_parents);
	1223	}
	1224	return ENOMEM;
	1225	}
	1226
	1227	// make sure the bitmap is zero'ed out...
	1228	if (bitmap) {
	1229	bzero(bitmap, (sizeof(char) * map_size));
	1230	}
	1231
	1232	if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
	1233	num_files * sizeof(int)))) {
	1234	goto err_exit_bulk_access;
	1235	}
	1236
	1237	if (num_parents) {
	1238	if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
	1239	num_parents * sizeof(cnid_t)))) {
	1240	goto err_exit_bulk_access;
	1241	}
	1242	}
	1243
	1244	flags = user_access_structp->flags;
	1245	if ((flags & (F_OK \| R_OK \| W_OK \| X_OK)) == 0) {
	1246	flags = R_OK;
	1247	}
	1248
	1249	/* check if we've been passed leaf node ids or parent ids */
	1250	if (flags & PARENT_IDS_FLAG) {
	1251	check_leaf = false;
	1252	}
	1253
	1254	/* Check access to each file_id passed in */
	1255	for (i = 0; i < num_files; i++) {
	1256	leaf_index=-1;
	1257	cnid = (cnid_t) file_ids[i];
	1258
	1259	/* root always has access */
	1260	if ((!parents) && (!suser(cred, NULL))) {
	1261	access[i] = 0;
	1262	continue;
	1263	}
	1264
	1265	if (check_leaf) {
	1266	/* do the lookup (checks the cnode hash, then the catalog) */
	1267	error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
	1268	if (error) {
	1269	access[i] = (short) error;
	1270	continue;
	1271	}
	1272
	1273	if (parents) {
	1274	// Check if the leaf matches one of the parent scopes
	1275	leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
	1276	if (leaf_index >= 0 && parents[leaf_index] == cnid)
	1277	prev_parent_check_ok = 0;
	1278	else if (leaf_index >= 0)
	1279	prev_parent_check_ok = 1;
	1280	}
	1281
	1282	// if the thing has acl's, do the full permission check
	1283	if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
	1284	struct vnode *cvp;
	1285	int myErr = 0;
	1286	/* get the vnode for this cnid */
	1287	myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
	1288	if ( myErr ) {
	1289	access[i] = myErr;
	1290	continue;
	1291	}
	1292
	1293	hfs_unlock(VTOC(cvp));
	1294
	1295	if (vnode_vtype(cvp) == VDIR) {
	1296	myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH \| KAUTH_VNODE_LIST_DIRECTORY), context);
	1297	} else {
	1298	myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
	1299	}
	1300
	1301	vnode_put(cvp);
	1302	if (myErr) {
	1303	access[i] = myErr;
	1304	continue;
	1305	}
	1306	} else {
	1307	/* before calling CheckAccess(), check the target file for read access */
	1308	myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
	1309	cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
	1310
	1311	/* fail fast if no access */
	1312	if ((myPerms & flags) == 0) {
	1313	access[i] = EACCES;
	1314	continue;
	1315	}
	1316	}
	1317	} else {
	1318	/* we were passed an array of parent ids */
	1319	catkey.hfsPlus.parentID = cnid;
	1320	}
	1321
	1322	/* if the last guy had the same parent and had access, we're done */
	1323	if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
	1324	cache.cachehits++;
	1325	access[i] = 0;
	1326	continue;
	1327	}
	1328
	1329	myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
	1330	skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
	1331
	1332	if (myaccess \|\| (error == ESRCH && leaf_index != -1)) {
	1333	access[i] = 0; // have access.. no errors to report
	1334	} else {
	1335	access[i] = (error != 0 ? (short) error : EACCES);
	1336	}
	1337
	1338	prevParent_cnid = catkey.hfsPlus.parentID;
	1339	}
	1340
	1341	/* copyout the access array */
	1342	if ((error = copyout((caddr_t)access, user_access_structp->access,
	1343	num_files * sizeof (short)))) {
	1344	goto err_exit_bulk_access;
	1345	}
	1346	if (map_size && bitmap) {
	1347	if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
	1348	map_size * sizeof (char)))) {
	1349	goto err_exit_bulk_access;
	1350	}
	1351	}
	1352
	1353
	1354	err_exit_bulk_access:
	1355
	1356	//printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
	1357
	1358	if (file_ids)
	1359	kfree(file_ids, sizeof(int) * num_files);
	1360	if (parents)
	1361	kfree(parents, sizeof(cnid_t) * num_parents);
	1362	if (bitmap)
	1363	kfree(bitmap, sizeof(char) * map_size);
	1364	if (access)
	1365	kfree(access, sizeof(short) * num_files);
	1366	if (cache.acache)
	1367	kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
	1368	if (cache.haveaccess)
	1369	kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
	1370
	1371	return (error);
	1372	}
	1373
	1374
	1375	/* end "bulk-access" support */
	1376
	1377
	1378	/*
	1379	* Callback for use with freeze ioctl.
	1380	*/
	1381	static int
	1382	hfs_freezewrite_callback(struct vnode vp, __unused void cargs)
	1383	{
	1384	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
	1385
	1386	return 0;
	1387	}
	1388
	1389	/*
	1390	* Control filesystem operating characteristics.
	1391	*/
	1392	int
	1393	hfs_vnop_ioctl( struct vnop_ioctl_args /* {
	1394	vnode_t a_vp;
	1395	int a_command;
	1396	caddr_t a_data;
	1397	int a_fflag;
	1398	vfs_context_t a_context;
	1399	} / ap)
	1400	{
	1401	struct vnode * vp = ap->a_vp;
	1402	struct hfsmount *hfsmp = VTOHFS(vp);
	1403	vfs_context_t context = ap->a_context;
	1404	kauth_cred_t cred = vfs_context_ucred(context);
	1405	proc_t p = vfs_context_proc(context);
	1406	struct vfsstatfs *vfsp;
	1407	boolean_t is64bit;
	1408	off_t jnl_start, jnl_size;
	1409	struct hfs_journal_info *jip;
	1410	#if HFS_COMPRESSION
	1411	int compressed = 0;
	1412	off_t uncompressed_size = -1;
	1413	int decmpfs_error = 0;
	1414
	1415	if (ap->a_command == F_RDADVISE) {
	1416	/* we need to inspect the decmpfs state of the file as early as possible */
	1417	compressed = hfs_file_is_compressed(VTOC(vp), 0);
	1418	if (compressed) {
	1419	if (VNODE_IS_RSRC(vp)) {
	1420	/* if this is the resource fork, treat it as if it were empty */
	1421	uncompressed_size = 0;
	1422	} else {
	1423	decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
	1424	if (decmpfs_error != 0) {
	1425	/* failed to get the uncompressed size, we'll check for this later */
	1426	uncompressed_size = -1;
	1427	}
	1428	}
	1429	}
	1430	}
	1431	#endif /* HFS_COMPRESSION */
	1432
	1433	is64bit = proc_is64bit(p);
	1434
	1435	switch (ap->a_command) {
	1436
	1437	case HFS_GETPATH:
	1438	{
	1439	struct vnode *file_vp;
	1440	cnid_t cnid;
	1441	int outlen;
	1442	char *bufptr;
	1443	int error;
	1444
	1445	/* Caller must be owner of file system. */
	1446	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1447	if (suser(cred, NULL) &&
	1448	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1449	return (EACCES);
	1450	}
	1451	/* Target vnode must be file system's root. */
	1452	if (!vnode_isvroot(vp)) {
	1453	return (EINVAL);
	1454	}
	1455	bufptr = (char *)ap->a_data;
	1456	cnid = strtoul(bufptr, NULL, 10);
	1457
	1458	/* We need to call hfs_vfs_vget to leverage the code that will
	1459	* fix the origin list for us if needed, as opposed to calling
	1460	* hfs_vget, since we will need the parent for build_path call.
	1461	*/
	1462
	1463	if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
	1464	return (error);
	1465	}
	1466	error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
	1467	vnode_put(file_vp);
	1468
	1469	return (error);
	1470	}
	1471
	1472	case HFS_PREV_LINK:
	1473	case HFS_NEXT_LINK:
	1474	{
	1475	cnid_t linkfileid;
	1476	cnid_t nextlinkid;
	1477	cnid_t prevlinkid;
	1478	int error;
	1479
	1480	/* Caller must be owner of file system. */
	1481	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1482	if (suser(cred, NULL) &&
	1483	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1484	return (EACCES);
	1485	}
	1486	/* Target vnode must be file system's root. */
	1487	if (!vnode_isvroot(vp)) {
	1488	return (EINVAL);
	1489	}
	1490	linkfileid = (cnid_t )ap->a_data;
	1491	if (linkfileid < kHFSFirstUserCatalogNodeID) {
	1492	return (EINVAL);
	1493	}
	1494	if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
	1495	return (error);
	1496	}
	1497	if (ap->a_command == HFS_NEXT_LINK) {
	1498	(cnid_t )ap->a_data = nextlinkid;
	1499	} else {
	1500	(cnid_t )ap->a_data = prevlinkid;
	1501	}
	1502	return (0);
	1503	}
	1504
	1505	case HFS_RESIZE_PROGRESS: {
	1506
	1507	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1508	if (suser(cred, NULL) &&
	1509	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1510	return (EACCES); /* must be owner of file system */
	1511	}
	1512	if (!vnode_isvroot(vp)) {
	1513	return (EINVAL);
	1514	}
	1515	/* file system must not be mounted read-only */
	1516	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1517	return (EROFS);
	1518	}
	1519
	1520	return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
	1521	}
	1522
	1523	case HFS_RESIZE_VOLUME: {
	1524	u_int64_t newsize;
	1525	u_int64_t cursize;
	1526
	1527	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1528	if (suser(cred, NULL) &&
	1529	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1530	return (EACCES); /* must be owner of file system */
	1531	}
	1532	if (!vnode_isvroot(vp)) {
	1533	return (EINVAL);
	1534	}
	1535
	1536	/* filesystem must not be mounted read only */
	1537	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1538	return (EROFS);
	1539	}
	1540	newsize = (u_int64_t )ap->a_data;
	1541	cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
	1542
	1543	if (newsize > cursize) {
	1544	return hfs_extendfs(hfsmp, (u_int64_t )ap->a_data, context);
	1545	} else if (newsize < cursize) {
	1546	return hfs_truncatefs(hfsmp, (u_int64_t )ap->a_data, context);
	1547	} else {
	1548	return (0);
	1549	}
	1550	}
	1551	case HFS_CHANGE_NEXT_ALLOCATION: {
	1552	int error = 0; /* Assume success */
	1553	u_int32_t location;
	1554
	1555	if (vnode_vfsisrdonly(vp)) {
	1556	return (EROFS);
	1557	}
	1558	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1559	if (suser(cred, NULL) &&
	1560	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1561	return (EACCES); /* must be owner of file system */
	1562	}
	1563	if (!vnode_isvroot(vp)) {
	1564	return (EINVAL);
	1565	}
	1566	HFS_MOUNT_LOCK(hfsmp, TRUE);
	1567	location = (u_int32_t )ap->a_data;
	1568	if ((location >= hfsmp->allocLimit) &&
	1569	(location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
	1570	error = EINVAL;
	1571	goto fail_change_next_allocation;
	1572	}
	1573	/* Return previous value. */
	1574	(u_int32_t )ap->a_data = hfsmp->nextAllocation;
	1575	if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
	1576	/* On magic value for location, set nextAllocation to next block
	1577	* after metadata zone and set flag in mount structure to indicate
	1578	* that nextAllocation should not be updated again.
	1579	*/
	1580	if (hfsmp->hfs_metazone_end != 0) {
	1581	HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
	1582	}
	1583	hfsmp->hfs_flags \|= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
	1584	} else {
	1585	hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
	1586	HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
	1587	}
	1588	MarkVCBDirty(hfsmp);
	1589	fail_change_next_allocation:
	1590	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	1591	return (error);
	1592	}
	1593
	1594	#ifdef HFS_SPARSE_DEV
	1595	case HFS_SETBACKINGSTOREINFO: {
	1596	struct vnode * bsfs_rootvp;
	1597	struct vnode * di_vp;
	1598	struct hfs_backingstoreinfo *bsdata;
	1599	int error = 0;
	1600
	1601	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1602	return (EROFS);
	1603	}
	1604	if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
	1605	return (EALREADY);
	1606	}
	1607	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1608	if (suser(cred, NULL) &&
	1609	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1610	return (EACCES); /* must be owner of file system */
	1611	}
	1612	bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
	1613	if (bsdata == NULL) {
	1614	return (EINVAL);
	1615	}
	1616	if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
	1617	return (error);
	1618	}
	1619	if ((error = vnode_getwithref(di_vp))) {
	1620	file_drop(bsdata->backingfd);
	1621	return(error);
	1622	}
	1623
	1624	if (vnode_mount(vp) == vnode_mount(di_vp)) {
	1625	(void)vnode_put(di_vp);
	1626	file_drop(bsdata->backingfd);
	1627	return (EINVAL);
	1628	}
	1629
	1630	/*
	1631	* Obtain the backing fs root vnode and keep a reference
	1632	* on it. This reference will be dropped in hfs_unmount.
	1633	*/
	1634	error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
	1635	if (error) {
	1636	(void)vnode_put(di_vp);
	1637	file_drop(bsdata->backingfd);
	1638	return (error);
	1639	}
	1640	vnode_ref(bsfs_rootvp);
	1641	vnode_put(bsfs_rootvp);
	1642
	1643	hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
	1644	hfsmp->hfs_flags \|= HFS_HAS_SPARSE_DEVICE;
	1645	hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
	1646	hfsmp->hfs_sparsebandblks *= 4;
	1647
	1648	vfs_markdependency(hfsmp->hfs_mp);
	1649
	1650	/*
	1651	* If the sparse image is on a sparse image file (as opposed to a sparse
	1652	* bundle), then we may need to limit the free space to the maximum size
	1653	* of a file on that volume. So we query (using pathconf), and if we get
	1654	* a meaningful result, we cache the number of blocks for later use in
	1655	* hfs_freeblks().
	1656	*/
	1657	hfsmp->hfs_backingfs_maxblocks = 0;
	1658	if (vnode_vtype(di_vp) == VREG) {
	1659	int terr;
	1660	int hostbits;
	1661	terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
	1662	if (terr == 0 && hostbits != 0 && hostbits < 64) {
	1663	u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
	1664
	1665	hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
	1666	}
	1667	}
	1668
	1669	(void)vnode_put(di_vp);
	1670	file_drop(bsdata->backingfd);
	1671	return (0);
	1672	}
	1673	case HFS_CLRBACKINGSTOREINFO: {
	1674	struct vnode * tmpvp;
	1675
	1676	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1677	if (suser(cred, NULL) &&
	1678	kauth_cred_getuid(cred) != vfsp->f_owner) {
	1679	return (EACCES); /* must be owner of file system */
	1680	}
	1681	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1682	return (EROFS);
	1683	}
	1684
	1685	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	1686	hfsmp->hfs_backingfs_rootvp) {
	1687
	1688	hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
	1689	tmpvp = hfsmp->hfs_backingfs_rootvp;
	1690	hfsmp->hfs_backingfs_rootvp = NULLVP;
	1691	hfsmp->hfs_sparsebandblks = 0;
	1692	vnode_rele(tmpvp);
	1693	}
	1694	return (0);
	1695	}
	1696	#endif /* HFS_SPARSE_DEV */
	1697
	1698	case F_FREEZE_FS: {
	1699	struct mount *mp;
	1700
	1701	mp = vnode_mount(vp);
	1702	hfsmp = VFSTOHFS(mp);
	1703
	1704	if (!(hfsmp->jnl))
	1705	return (ENOTSUP);
	1706
	1707	vfsp = vfs_statfs(mp);
	1708
	1709	if (kauth_cred_getuid(cred) != vfsp->f_owner &&
	1710	!kauth_cred_issuser(cred))
	1711	return (EACCES);
	1712
	1713	lck_rw_lock_exclusive(&hfsmp->hfs_insync);
	1714
	1715	// flush things before we get started to try and prevent
	1716	// dirty data from being paged out while we're frozen.
	1717	// note: can't do this after taking the lock as it will
	1718	// deadlock against ourselves.
	1719	vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
	1720	hfs_global_exclusive_lock_acquire(hfsmp);
	1721
	1722	// DO NOT call hfs_journal_flush() because that takes a
	1723	// shared lock on the global exclusive lock!
	1724	journal_flush(hfsmp->jnl);
	1725
	1726	// don't need to iterate on all vnodes, we just need to
	1727	// wait for writes to the system files and the device vnode
	1728	if (HFSTOVCB(hfsmp)->extentsRefNum)
	1729	vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
	1730	if (HFSTOVCB(hfsmp)->catalogRefNum)
	1731	vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
	1732	if (HFSTOVCB(hfsmp)->allocationsRefNum)
	1733	vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
	1734	if (hfsmp->hfs_attribute_vp)
	1735	vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
	1736	vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
	1737
	1738	hfsmp->hfs_freezing_proc = current_proc();
	1739
	1740	return (0);
	1741	}
	1742
	1743	case F_THAW_FS: {
	1744	vfsp = vfs_statfs(vnode_mount(vp));
	1745	if (kauth_cred_getuid(cred) != vfsp->f_owner &&
	1746	!kauth_cred_issuser(cred))
	1747	return (EACCES);
	1748
	1749	// if we're not the one who froze the fs then we
	1750	// can't thaw it.
	1751	if (hfsmp->hfs_freezing_proc != current_proc()) {
	1752	return EPERM;
	1753	}
	1754
	1755	// NOTE: if you add code here, also go check the
	1756	// code that "thaws" the fs in hfs_vnop_close()
	1757	//
	1758	hfsmp->hfs_freezing_proc = NULL;
	1759	hfs_global_exclusive_lock_release(hfsmp);
	1760	lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
	1761
	1762	return (0);
	1763	}
	1764
	1765	case HFS_BULKACCESS_FSCTL: {
	1766	int size;
	1767
	1768	if (hfsmp->hfs_flags & HFS_STANDARD) {
	1769	return EINVAL;
	1770	}
	1771
	1772	if (is64bit) {
	1773	size = sizeof(struct user64_access_t);
	1774	} else {
	1775	size = sizeof(struct user32_access_t);
	1776	}
	1777
	1778	return do_bulk_access_check(hfsmp, vp, ap, size, context);
	1779	}
	1780
	1781	case HFS_EXT_BULKACCESS_FSCTL: {
	1782	int size;
	1783
	1784	if (hfsmp->hfs_flags & HFS_STANDARD) {
	1785	return EINVAL;
	1786	}
	1787
	1788	if (is64bit) {
	1789	size = sizeof(struct user64_ext_access_t);
	1790	} else {
	1791	size = sizeof(struct user32_ext_access_t);
	1792	}
	1793
	1794	return do_bulk_access_check(hfsmp, vp, ap, size, context);
	1795	}
	1796
	1797	case HFS_SETACLSTATE: {
	1798	int state;
	1799
	1800	if (ap->a_data == NULL) {
	1801	return (EINVAL);
	1802	}
	1803
	1804	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1805	state = (int )ap->a_data;
	1806
	1807	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1808	return (EROFS);
	1809	}
	1810	// super-user can enable or disable acl's on a volume.
	1811	// the volume owner can only enable acl's
	1812	if (!is_suser() && (state == 0 \|\| kauth_cred_getuid(cred) != vfsp->f_owner)) {
	1813	return (EPERM);
	1814	}
	1815	if (state == 0 \|\| state == 1)
	1816	return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
	1817	else
	1818	return (EINVAL);
	1819	}
	1820
	1821	case HFS_SET_XATTREXTENTS_STATE: {
	1822	int state;
	1823
	1824	if (ap->a_data == NULL) {
	1825	return (EINVAL);
	1826	}
	1827
	1828	state = (int )ap->a_data;
	1829
	1830	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1831	return (EROFS);
	1832	}
	1833
	1834	/* Super-user can enable or disable extent-based extended
	1835	* attribute support on a volume
	1836	*/
	1837	if (!is_suser()) {
	1838	return (EPERM);
	1839	}
	1840	if (state == 0 \|\| state == 1)
	1841	return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
	1842	else
	1843	return (EINVAL);
	1844	}
	1845
	1846	case F_FULLFSYNC: {
	1847	int error;
	1848
	1849	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1850	return (EROFS);
	1851	}
	1852	error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
	1853	if (error == 0) {
	1854	error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
	1855	hfs_unlock(VTOC(vp));
	1856	}
	1857
	1858	return error;
	1859	}
	1860
	1861	case F_CHKCLEAN: {
	1862	register struct cnode *cp;
	1863	int error;
	1864
	1865	if (!vnode_isreg(vp))
	1866	return EINVAL;
	1867
	1868	error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
	1869	if (error == 0) {
	1870	cp = VTOC(vp);
	1871	/*
	1872	* used by regression test to determine if
	1873	* all the dirty pages (via write) have been cleaned
	1874	* after a call to 'fsysnc'.
	1875	*/
	1876	error = is_file_clean(vp, VTOF(vp)->ff_size);
	1877	hfs_unlock(cp);
	1878	}
	1879	return (error);
	1880	}
	1881
	1882	case F_RDADVISE: {
	1883	register struct radvisory *ra;
	1884	struct filefork *fp;
	1885	int error;
	1886
	1887	if (!vnode_isreg(vp))
	1888	return EINVAL;
	1889
	1890	ra = (struct radvisory *)(ap->a_data);
	1891	fp = VTOF(vp);
	1892
	1893	/* Protect against a size change. */
	1894	hfs_lock_truncate(VTOC(vp), TRUE);
	1895
	1896	#if HFS_COMPRESSION
	1897	if (compressed && (uncompressed_size == -1)) {
	1898	/* fetching the uncompressed size failed above, so return the error */
	1899	error = decmpfs_error;
	1900	} else if ((compressed && (ra->ra_offset >= uncompressed_size)) \|\|
	1901	(!compressed && (ra->ra_offset >= fp->ff_size))) {
	1902	error = EFBIG;
	1903	}
	1904	#else /* HFS_COMPRESSION */
	1905	if (ra->ra_offset >= fp->ff_size) {
	1906	error = EFBIG;
	1907	}
	1908	#endif /* HFS_COMPRESSION */
	1909	else {
	1910	error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
	1911	}
	1912
	1913	hfs_unlock_truncate(VTOC(vp), TRUE);
	1914	return (error);
	1915	}
	1916
	1917	case F_READBOOTSTRAP:
	1918	case F_WRITEBOOTSTRAP:
	1919	{
	1920	struct vnode *devvp = NULL;
	1921	user_fbootstraptransfer_t *user_bootstrapp;
	1922	int devBlockSize;
	1923	int error;
	1924	uio_t auio;
	1925	daddr64_t blockNumber;
	1926	u_int32_t blockOffset;
	1927	u_int32_t xfersize;
	1928	struct buf *bp;
	1929	user_fbootstraptransfer_t user_bootstrap;
	1930
	1931	if (!vnode_isvroot(vp))
	1932	return (EINVAL);
	1933	/* LP64 - when caller is a 64 bit process then we are passed a pointer
	1934	* to a user_fbootstraptransfer_t else we get a pointer to a
	1935	* fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
	1936	*/
	1937	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	1938	return (EROFS);
	1939	}
	1940	if (is64bit) {
	1941	user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
	1942	}
	1943	else {
	1944	user32_fbootstraptransfer_t bootstrapp = (user32_fbootstraptransfer_t )ap->a_data;
	1945	user_bootstrapp = &user_bootstrap;
	1946	user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
	1947	user_bootstrap.fbt_length = bootstrapp->fbt_length;
	1948	user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
	1949	}
	1950	if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
	1951	return EINVAL;
	1952
	1953	devvp = VTOHFS(vp)->hfs_devvp;
	1954	auio = uio_create(1, user_bootstrapp->fbt_offset,
	1955	is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
	1956	(ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
	1957	uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
	1958
	1959	devBlockSize = vfs_devblocksize(vnode_mount(vp));
	1960
	1961	while (uio_resid(auio) > 0) {
	1962	blockNumber = uio_offset(auio) / devBlockSize;
	1963	error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
	1964	if (error) {
	1965	if (bp) buf_brelse(bp);
	1966	uio_free(auio);
	1967	return error;
	1968	};
	1969
	1970	blockOffset = uio_offset(auio) % devBlockSize;
	1971	xfersize = devBlockSize - blockOffset;
	1972	error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
	1973	if (error) {
	1974	buf_brelse(bp);
	1975	uio_free(auio);
	1976	return error;
	1977	};
	1978	if (uio_rw(auio) == UIO_WRITE) {
	1979	error = VNOP_BWRITE(bp);
	1980	if (error) {
	1981	uio_free(auio);
	1982	return error;
	1983	}
	1984	} else {
	1985	buf_brelse(bp);
	1986	};
	1987	};
	1988	uio_free(auio);
	1989	};
	1990	return 0;
	1991
	1992	case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
	1993	{
	1994	if (is64bit) {
	1995	(user_time_t )(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
	1996	}
	1997	else {
	1998	(user32_time_t )(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
	1999	}
	2000	return 0;
	2001	}
	2002
	2003	case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
	2004	(uint32_t )ap->a_data = hfsmp->hfs_mount_time;
	2005	break;
	2006
	2007	case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
	2008	(uint32_t )ap->a_data = hfsmp->hfs_last_mounted_mtime;
	2009	break;
	2010
	2011	case HFS_FSCTL_SET_VERY_LOW_DISK:
	2012	if ((uint32_t )ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
	2013	return EINVAL;
	2014	}
	2015
	2016	hfsmp->hfs_freespace_notify_dangerlimit = (uint32_t )ap->a_data;
	2017	break;
	2018
	2019	case HFS_FSCTL_SET_LOW_DISK:
	2020	if ( (uint32_t )ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
	2021	\|\| (uint32_t )ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
	2022
	2023	return EINVAL;
	2024	}
	2025
	2026	hfsmp->hfs_freespace_notify_warninglimit = (uint32_t )ap->a_data;
	2027	break;
	2028
	2029	case HFS_FSCTL_SET_DESIRED_DISK:
	2030	if ((uint32_t )ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
	2031	return EINVAL;
	2032	}
	2033
	2034	hfsmp->hfs_freespace_notify_desiredlevel = (uint32_t )ap->a_data;
	2035	break;
	2036
	2037	case HFS_VOLUME_STATUS:
	2038	(uint32_t )ap->a_data = hfsmp->hfs_notification_conditions;
	2039	break;
	2040
	2041	case HFS_SET_BOOT_INFO:
	2042	if (!vnode_isvroot(vp))
	2043	return(EINVAL);
	2044	if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
	2045	return(EACCES); /* must be superuser or owner of filesystem */
	2046	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	2047	return (EROFS);
	2048	}
	2049	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2050	bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
	2051	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2052	(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
	2053	break;
	2054
	2055	case HFS_GET_BOOT_INFO:
	2056	if (!vnode_isvroot(vp))
	2057	return(EINVAL);
	2058	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2059	bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
	2060	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2061	break;
	2062
	2063	case HFS_MARK_BOOT_CORRUPT:
	2064	/* Mark the boot volume corrupt by setting
	2065	* kHFSVolumeInconsistentBit in the volume header. This will
	2066	* force fsck_hfs on next mount.
	2067	*/
	2068	if (!is_suser()) {
	2069	return EACCES;
	2070	}
	2071
	2072	/* Allowed only on the root vnode of the boot volume */
	2073	if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) \|\|
	2074	!vnode_isvroot(vp)) {
	2075	return EINVAL;
	2076	}
	2077	if (hfsmp->hfs_flags & HFS_READ_ONLY) {
	2078	return (EROFS);
	2079	}
	2080	printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
	2081	hfs_mark_volume_inconsistent(hfsmp);
	2082	break;
	2083
	2084	case HFS_FSCTL_GET_JOURNAL_INFO:
	2085	jip = (struct hfs_journal_info*)ap->a_data;
	2086
	2087	if (vp == NULLVP)
	2088	return EINVAL;
	2089
	2090	if (hfsmp->jnl == NULL) {
	2091	jnl_start = 0;
	2092	jnl_size = 0;
	2093	} else {
	2094	jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
	2095	jnl_size = (off_t)hfsmp->jnl_size;
	2096	}
	2097
	2098	jip->jstart = jnl_start;
	2099	jip->jsize = jnl_size;
	2100	break;
	2101
	2102	case HFS_SET_ALWAYS_ZEROFILL: {
	2103	struct cnode *cp = VTOC(vp);
	2104
	2105	if ((int )ap->a_data) {
	2106	cp->c_flag \|= C_ALWAYS_ZEROFILL;
	2107	} else {
	2108	cp->c_flag &= ~C_ALWAYS_ZEROFILL;
	2109	}
	2110	break;
	2111	}
	2112
	2113	default:
	2114	return (ENOTTY);
	2115	}
	2116
	2117	return 0;
	2118	}
	2119
	2120	/*
	2121	* select
	2122	*/
	2123	int
	2124	hfs_vnop_select(__unused struct vnop_select_args *ap)
	2125	/*
	2126	struct vnop_select_args {
	2127	vnode_t a_vp;
	2128	int a_which;
	2129	int a_fflags;
	2130	void *a_wql;
	2131	vfs_context_t a_context;
	2132	};
	2133	*/
	2134	{
	2135	/*
	2136	* We should really check to see if I/O is possible.
	2137	*/
	2138	return (1);
	2139	}
	2140
	2141	/*
	2142	* Converts a logical block number to a physical block, and optionally returns
	2143	* the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
	2144	* The physical block number is based on the device block size, currently its 512.
	2145	* The block run is returned in logical blocks, and is the REMAINING amount of blocks
	2146	*/
	2147	int
	2148	hfs_bmap(struct vnode vp, daddr_t bn, struct vnode vpp, daddr64_t bnp, unsigned int *runp)
	2149	{
	2150	struct filefork *fp = VTOF(vp);
	2151	struct hfsmount *hfsmp = VTOHFS(vp);
	2152	int retval = E_NONE;
	2153	u_int32_t logBlockSize;
	2154	size_t bytesContAvail = 0;
	2155	off_t blockposition;
	2156	int lockExtBtree;
	2157	int lockflags = 0;
	2158
	2159	/*
	2160	* Check for underlying vnode requests and ensure that logical
	2161	* to physical mapping is requested.
	2162	*/
	2163	if (vpp != NULL)
	2164	*vpp = hfsmp->hfs_devvp;
	2165	if (bnp == NULL)
	2166	return (0);
	2167
	2168	logBlockSize = GetLogicalBlockSize(vp);
	2169	blockposition = (off_t)bn * logBlockSize;
	2170
	2171	lockExtBtree = overflow_extents(fp);
	2172
	2173	if (lockExtBtree)
	2174	lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
	2175
	2176	retval = MacToVFSError(
	2177	MapFileBlockC (HFSTOVCB(hfsmp),
	2178	(FCB*)fp,
	2179	MAXPHYSIO,
	2180	blockposition,
	2181	bnp,
	2182	&bytesContAvail));
	2183
	2184	if (lockExtBtree)
	2185	hfs_systemfile_unlock(hfsmp, lockflags);
	2186
	2187	if (retval == E_NONE) {
	2188	/* Figure out how many read ahead blocks there are */
	2189	if (runp != NULL) {
	2190	if (can_cluster(logBlockSize)) {
	2191	/* Make sure this result never goes negative: */
	2192	*runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
	2193	} else {
	2194	*runp = 0;
	2195	}
	2196	}
	2197	}
	2198	return (retval);
	2199	}
	2200
	2201	/*
	2202	* Convert logical block number to file offset.
	2203	*/
	2204	int
	2205	hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
	2206	/*
	2207	struct vnop_blktooff_args {
	2208	vnode_t a_vp;
	2209	daddr64_t a_lblkno;
	2210	off_t *a_offset;
	2211	};
	2212	*/
	2213	{
	2214	if (ap->a_vp == NULL)
	2215	return (EINVAL);
	2216	ap->a_offset = (off_t)ap->a_lblkno (off_t)GetLogicalBlockSize(ap->a_vp);
	2217
	2218	return(0);
	2219	}
	2220
	2221	/*
	2222	* Convert file offset to logical block number.
	2223	*/
	2224	int
	2225	hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
	2226	/*
	2227	struct vnop_offtoblk_args {
	2228	vnode_t a_vp;
	2229	off_t a_offset;
	2230	daddr64_t *a_lblkno;
	2231	};
	2232	*/
	2233	{
	2234	if (ap->a_vp == NULL)
	2235	return (EINVAL);
	2236	*ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
	2237
	2238	return(0);
	2239	}
	2240
	2241	/*
	2242	* Map file offset to physical block number.
	2243	*
	2244	* If this function is called for write operation, and if the file
	2245	* had virtual blocks allocated (delayed allocation), real blocks
	2246	* are allocated by calling ExtendFileC().
	2247	*
	2248	* If this function is called for read operation, and if the file
	2249	* had virtual blocks allocated (delayed allocation), no change
	2250	* to the size of file is done, and if required, rangelist is
	2251	* searched for mapping.
	2252	*
	2253	* System file cnodes are expected to be locked (shared or exclusive).
	2254	*/
	2255	int
	2256	hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
	2257	/*
	2258	struct vnop_blockmap_args {
	2259	vnode_t a_vp;
	2260	off_t a_foffset;
	2261	size_t a_size;
	2262	daddr64_t *a_bpn;
	2263	size_t *a_run;
	2264	void *a_poff;
	2265	int a_flags;
	2266	vfs_context_t a_context;
	2267	};
	2268	*/
	2269	{
	2270	struct vnode *vp = ap->a_vp;
	2271	struct cnode *cp;
	2272	struct filefork *fp;
	2273	struct hfsmount *hfsmp;
	2274	size_t bytesContAvail = 0;
	2275	int retval = E_NONE;
	2276	int syslocks = 0;
	2277	int lockflags = 0;
	2278	struct rl_entry *invalid_range;
	2279	enum rl_overlaptype overlaptype;
	2280	int started_tr = 0;
	2281	int tooklock = 0;
	2282
	2283	#if HFS_COMPRESSION
	2284	if (VNODE_IS_RSRC(vp)) {
	2285	/* allow blockmaps to the resource fork */
	2286	} else {
	2287	if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
	2288	int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
	2289	switch(state) {
	2290	case FILE_IS_COMPRESSED:
	2291	return ENOTSUP;
	2292	case FILE_IS_CONVERTING:
	2293	/* if FILE_IS_CONVERTING, we allow blockmap */
	2294	break;
	2295	default:
	2296	printf("invalid state %d for compressed file\n", state);
	2297	/* fall through */
	2298	}
	2299	}
	2300	}
	2301	#endif /* HFS_COMPRESSION */
	2302
	2303	/* Do not allow blockmap operation on a directory */
	2304	if (vnode_isdir(vp)) {
	2305	return (ENOTSUP);
	2306	}
	2307
	2308	/*
	2309	* Check for underlying vnode requests and ensure that logical
	2310	* to physical mapping is requested.
	2311	*/
	2312	if (ap->a_bpn == NULL)
	2313	return (0);
	2314
	2315	if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
	2316	if (VTOC(vp)->c_lockowner != current_thread()) {
	2317	hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
	2318	tooklock = 1;
	2319	}
	2320	}
	2321	hfsmp = VTOHFS(vp);
	2322	cp = VTOC(vp);
	2323	fp = VTOF(vp);
	2324
	2325	retry:
	2326	/* Check virtual blocks only when performing write operation */
	2327	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
	2328	if (hfs_start_transaction(hfsmp) != 0) {
	2329	retval = EINVAL;
	2330	goto exit;
	2331	} else {
	2332	started_tr = 1;
	2333	}
	2334	syslocks = SFL_EXTENTS \| SFL_BITMAP;
	2335
	2336	} else if (overflow_extents(fp)) {
	2337	syslocks = SFL_EXTENTS;
	2338	}
	2339
	2340	if (syslocks)
	2341	lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
	2342
	2343	/*
	2344	* Check for any delayed allocations.
	2345	*/
	2346	if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
	2347	int64_t actbytes;
	2348	u_int32_t loanedBlocks;
	2349
	2350	//
	2351	// Make sure we have a transaction. It's possible
	2352	// that we came in and fp->ff_unallocblocks was zero
	2353	// but during the time we blocked acquiring the extents
	2354	// btree, ff_unallocblocks became non-zero and so we
	2355	// will need to start a transaction.
	2356	//
	2357	if (started_tr == 0) {
	2358	if (syslocks) {
	2359	hfs_systemfile_unlock(hfsmp, lockflags);
	2360	syslocks = 0;
	2361	}
	2362	goto retry;
	2363	}
	2364
	2365	/*
	2366	* Note: ExtendFileC will Release any blocks on loan and
	2367	* aquire real blocks. So we ask to extend by zero bytes
	2368	* since ExtendFileC will account for the virtual blocks.
	2369	*/
	2370
	2371	loanedBlocks = fp->ff_unallocblocks;
	2372	retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
	2373	kEFAllMask \| kEFNoClumpMask, &actbytes);
	2374
	2375	if (retval) {
	2376	fp->ff_unallocblocks = loanedBlocks;
	2377	cp->c_blocks += loanedBlocks;
	2378	fp->ff_blocks += loanedBlocks;
	2379
	2380	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2381	hfsmp->loanedBlocks += loanedBlocks;
	2382	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2383
	2384	hfs_systemfile_unlock(hfsmp, lockflags);
	2385	cp->c_flag \|= C_MODIFIED;
	2386	if (started_tr) {
	2387	(void) hfs_update(vp, TRUE);
	2388	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2389
	2390	hfs_end_transaction(hfsmp);
	2391	started_tr = 0;
	2392	}
	2393	goto exit;
	2394	}
	2395	}
	2396
	2397	retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
	2398	ap->a_bpn, &bytesContAvail);
	2399	if (syslocks) {
	2400	hfs_systemfile_unlock(hfsmp, lockflags);
	2401	syslocks = 0;
	2402	}
	2403
	2404	if (started_tr) {
	2405	(void) hfs_update(vp, TRUE);
	2406	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2407	hfs_end_transaction(hfsmp);
	2408	started_tr = 0;
	2409	}
	2410	if (retval) {
	2411	/* On write, always return error because virtual blocks, if any,
	2412	* should have been allocated in ExtendFileC(). We do not
	2413	* allocate virtual blocks on read, therefore return error
	2414	* only if no virtual blocks are allocated. Otherwise we search
	2415	* rangelist for zero-fills
	2416	*/
	2417	if ((MacToVFSError(retval) != ERANGE) \|\|
	2418	(ap->a_flags & VNODE_WRITE) \|\|
	2419	((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
	2420	goto exit;
	2421	}
	2422
	2423	/* Validate if the start offset is within logical file size */
	2424	if (ap->a_foffset > fp->ff_size) {
	2425	goto exit;
	2426	}
	2427
	2428	/* Searching file extents has failed for read operation, therefore
	2429	* search rangelist for any uncommitted holes in the file.
	2430	*/
	2431	overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
	2432	ap->a_foffset + (off_t)(ap->a_size - 1),
	2433	&invalid_range);
	2434	switch(overlaptype) {
	2435	case RL_OVERLAPISCONTAINED:
	2436	/* start_offset <= rl_start, end_offset >= rl_end */
	2437	if (ap->a_foffset != invalid_range->rl_start) {
	2438	break;
	2439	}
	2440	case RL_MATCHINGOVERLAP:
	2441	/* start_offset = rl_start, end_offset = rl_end */
	2442	case RL_OVERLAPCONTAINSRANGE:
	2443	/* start_offset >= rl_start, end_offset <= rl_end */
	2444	case RL_OVERLAPSTARTSBEFORE:
	2445	/* start_offset > rl_start, end_offset >= rl_start */
	2446	if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
	2447	bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
	2448	} else {
	2449	bytesContAvail = fp->ff_size - ap->a_foffset;
	2450	}
	2451	if (bytesContAvail > ap->a_size) {
	2452	bytesContAvail = ap->a_size;
	2453	}
	2454	*ap->a_bpn = (daddr64_t)-1;
	2455	retval = 0;
	2456	break;
	2457	case RL_OVERLAPENDSAFTER:
	2458	/* start_offset < rl_start, end_offset < rl_end */
	2459	case RL_NOOVERLAP:
	2460	break;
	2461	}
	2462	goto exit;
	2463	}
	2464
	2465	/* MapFileC() found a valid extent in the filefork. Search the
	2466	* mapping information further for invalid file ranges
	2467	*/
	2468	overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
	2469	ap->a_foffset + (off_t)bytesContAvail - 1,
	2470	&invalid_range);
	2471	if (overlaptype != RL_NOOVERLAP) {
	2472	switch(overlaptype) {
	2473	case RL_MATCHINGOVERLAP:
	2474	case RL_OVERLAPCONTAINSRANGE:
	2475	case RL_OVERLAPSTARTSBEFORE:
	2476	/* There's no valid block for this byte offset */
	2477	*ap->a_bpn = (daddr64_t)-1;
	2478	/* There's no point limiting the amount to be returned
	2479	* if the invalid range that was hit extends all the way
	2480	* to the EOF (i.e. there's no valid bytes between the
	2481	* end of this range and the file's EOF):
	2482	*/
	2483	if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
	2484	((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
	2485	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	2486	}
	2487	break;
	2488
	2489	case RL_OVERLAPISCONTAINED:
	2490	case RL_OVERLAPENDSAFTER:
	2491	/* The range of interest hits an invalid block before the end: */
	2492	if (invalid_range->rl_start == ap->a_foffset) {
	2493	/* There's actually no valid information to be had starting here: */
	2494	*ap->a_bpn = (daddr64_t)-1;
	2495	if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
	2496	((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
	2497	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	2498	}
	2499	} else {
	2500	bytesContAvail = invalid_range->rl_start - ap->a_foffset;
	2501	}
	2502	break;
	2503
	2504	case RL_NOOVERLAP:
	2505	break;
	2506	} /* end switch */
	2507	if (bytesContAvail > ap->a_size)
	2508	bytesContAvail = ap->a_size;
	2509	}
	2510
	2511	exit:
	2512	if (retval == 0) {
	2513	if (ap->a_run)
	2514	*ap->a_run = bytesContAvail;
	2515
	2516	if (ap->a_poff)
	2517	(int )ap->a_poff = 0;
	2518	}
	2519
	2520	if (tooklock)
	2521	hfs_unlock(cp);
	2522
	2523	return (MacToVFSError(retval));
	2524	}
	2525
	2526
	2527	/*
	2528	* prepare and issue the I/O
	2529	* buf_strategy knows how to deal
	2530	* with requests that require
	2531	* fragmented I/Os
	2532	*/
	2533	int
	2534	hfs_vnop_strategy(struct vnop_strategy_args *ap)
	2535	{
	2536	buf_t bp = ap->a_bp;
	2537	vnode_t vp = buf_vnode(bp);
	2538
	2539	return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
	2540	}
	2541
	2542	static int
	2543	hfs_minorupdate(struct vnode *vp) {
	2544	struct cnode *cp = VTOC(vp);
	2545	cp->c_flag &= ~C_MODIFIED;
	2546	cp->c_touch_acctime = 0;
	2547	cp->c_touch_chgtime = 0;
	2548	cp->c_touch_modtime = 0;
	2549
	2550	return 0;
	2551	}
	2552
	2553	static int
	2554	do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
	2555	{
	2556	register struct cnode *cp = VTOC(vp);
	2557	struct filefork *fp = VTOF(vp);
	2558	struct proc *p = vfs_context_proc(context);;
	2559	kauth_cred_t cred = vfs_context_ucred(context);
	2560	int retval;
	2561	off_t bytesToAdd;
	2562	off_t actualBytesAdded;
	2563	off_t filebytes;
	2564	u_int32_t fileblocks;
	2565	int blksize;
	2566	struct hfsmount *hfsmp;
	2567	int lockflags;
	2568
	2569	blksize = VTOVCB(vp)->blockSize;
	2570	fileblocks = fp->ff_blocks;
	2571	filebytes = (off_t)fileblocks * (off_t)blksize;
	2572
	2573	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_START,
	2574	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	2575
	2576	if (length < 0)
	2577	return (EINVAL);
	2578
	2579	/* This should only happen with a corrupt filesystem */
	2580	if ((off_t)fp->ff_size < 0)
	2581	return (EINVAL);
	2582
	2583	if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
	2584	return (EFBIG);
	2585
	2586	hfsmp = VTOHFS(vp);
	2587
	2588	retval = E_NONE;
	2589
	2590	/* Files that are changing size are not hot file candidates. */
	2591	if (hfsmp->hfc_stage == HFC_RECORDING) {
	2592	fp->ff_bytesread = 0;
	2593	}
	2594
	2595	/*
	2596	* We cannot just check if fp->ff_size == length (as an optimization)
	2597	* since there may be extra physical blocks that also need truncation.
	2598	*/
	2599	#if QUOTA
	2600	if ((retval = hfs_getinoquota(cp)))
	2601	return(retval);
	2602	#endif /* QUOTA */
	2603
	2604	/*
	2605	* Lengthen the size of the file. We must ensure that the
	2606	* last byte of the file is allocated. Since the smallest
	2607	* value of ff_size is 0, length will be at least 1.
	2608	*/
	2609	if (length > (off_t)fp->ff_size) {
	2610	#if QUOTA
	2611	retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
	2612	cred, 0);
	2613	if (retval)
	2614	goto Err_Exit;
	2615	#endif /* QUOTA */
	2616	/*
	2617	* If we don't have enough physical space then
	2618	* we need to extend the physical size.
	2619	*/
	2620	if (length > filebytes) {
	2621	int eflags;
	2622	u_int32_t blockHint = 0;
	2623
	2624	/* All or nothing and don't round up to clumpsize. */
	2625	eflags = kEFAllMask \| kEFNoClumpMask;
	2626
	2627	if (cred && suser(cred, NULL) != 0)
	2628	eflags \|= kEFReserveMask; /* keep a reserve */
	2629
	2630	/*
	2631	* Allocate Journal and Quota files in metadata zone.
	2632	*/
	2633	if (filebytes == 0 &&
	2634	hfsmp->hfs_flags & HFS_METADATA_ZONE &&
	2635	hfs_virtualmetafile(cp)) {
	2636	eflags \|= kEFMetadataMask;
	2637	blockHint = hfsmp->hfs_metazone_start;
	2638	}
	2639	if (hfs_start_transaction(hfsmp) != 0) {
	2640	retval = EINVAL;
	2641	goto Err_Exit;
	2642	}
	2643
	2644	/* Protect extents b-tree and allocation bitmap */
	2645	lockflags = SFL_BITMAP;
	2646	if (overflow_extents(fp))
	2647	lockflags \|= SFL_EXTENTS;
	2648	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2649
	2650	while ((length > filebytes) && (retval == E_NONE)) {
	2651	bytesToAdd = length - filebytes;
	2652	retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
	2653	(FCB*)fp,
	2654	bytesToAdd,
	2655	blockHint,
	2656	eflags,
	2657	&actualBytesAdded));
	2658
	2659	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	2660	if (actualBytesAdded == 0 && retval == E_NONE) {
	2661	if (length > filebytes)
	2662	length = filebytes;
	2663	break;
	2664	}
	2665	} /* endwhile */
	2666
	2667	hfs_systemfile_unlock(hfsmp, lockflags);
	2668
	2669	if (hfsmp->jnl) {
	2670	if (skipupdate) {
	2671	(void) hfs_minorupdate(vp);
	2672	}
	2673	else {
	2674	(void) hfs_update(vp, TRUE);
	2675	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2676	}
	2677	}
	2678
	2679	hfs_end_transaction(hfsmp);
	2680
	2681	if (retval)
	2682	goto Err_Exit;
	2683
	2684	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	2685	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	2686	}
	2687
	2688	if (!(flags & IO_NOZEROFILL)) {
	2689	if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
	2690	struct rl_entry *invalid_range;
	2691	off_t zero_limit;
	2692
	2693	zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	2694	if (length < zero_limit) zero_limit = length;
	2695
	2696	if (length > (off_t)fp->ff_size) {
	2697	struct timeval tv;
	2698
	2699	/* Extending the file: time to fill out the current last page w. zeroes? */
	2700	if ((fp->ff_size & PAGE_MASK_64) &&
	2701	(rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
	2702	fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
	2703
	2704	/* There's some valid data at the start of the (current) last page
	2705	of the file, so zero out the remainder of that page to ensure the
	2706	entire page contains valid data. Since there is no invalid range
	2707	possible past the (current) eof, there's no need to remove anything
	2708	from the invalid range list before calling cluster_write(): */
	2709	hfs_unlock(cp);
	2710	retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
	2711	fp->ff_size, (off_t)0,
	2712	(flags & IO_SYNC) \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	2713	hfs_lock(cp, HFS_FORCE_LOCK);
	2714	if (retval) goto Err_Exit;
	2715
	2716	/* Merely invalidate the remaining area, if necessary: */
	2717	if (length > zero_limit) {
	2718	microuptime(&tv);
	2719	rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
	2720	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	2721	}
	2722	} else {
	2723	/* The page containing the (current) eof is invalid: just add the
	2724	remainder of the page to the invalid list, along with the area
	2725	being newly allocated:
	2726	*/
	2727	microuptime(&tv);
	2728	rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
	2729	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	2730	};
	2731	}
	2732	} else {
	2733	panic("hfs_truncate: invoked on non-UBC object?!");
	2734	};
	2735	}
	2736	cp->c_touch_modtime = TRUE;
	2737	fp->ff_size = length;
	2738
	2739	} else { /* Shorten the size of the file */
	2740
	2741	if ((off_t)fp->ff_size > length) {
	2742	/* Any space previously marked as invalid is now irrelevant: */
	2743	rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
	2744	}
	2745
	2746	/*
	2747	* Account for any unmapped blocks. Note that the new
	2748	* file length can still end up with unmapped blocks.
	2749	*/
	2750	if (fp->ff_unallocblocks > 0) {
	2751	u_int32_t finalblks;
	2752	u_int32_t loanedBlocks;
	2753
	2754	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2755
	2756	loanedBlocks = fp->ff_unallocblocks;
	2757	cp->c_blocks -= loanedBlocks;
	2758	fp->ff_blocks -= loanedBlocks;
	2759	fp->ff_unallocblocks = 0;
	2760
	2761	hfsmp->loanedBlocks -= loanedBlocks;
	2762
	2763	finalblks = (length + blksize - 1) / blksize;
	2764	if (finalblks > fp->ff_blocks) {
	2765	/* calculate required unmapped blocks */
	2766	loanedBlocks = finalblks - fp->ff_blocks;
	2767	hfsmp->loanedBlocks += loanedBlocks;
	2768
	2769	fp->ff_unallocblocks = loanedBlocks;
	2770	cp->c_blocks += loanedBlocks;
	2771	fp->ff_blocks += loanedBlocks;
	2772	}
	2773	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2774	}
	2775
	2776	/*
	2777	* For a TBE process the deallocation of the file blocks is
	2778	* delayed until the file is closed. And hfs_close calls
	2779	* truncate with the IO_NDELAY flag set. So when IO_NDELAY
	2780	* isn't set, we make sure this isn't a TBE process.
	2781	*/
	2782	if ((flags & IO_NDELAY) \|\| (proc_tbe(p) == 0)) {
	2783	#if QUOTA
	2784	off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
	2785	#endif /* QUOTA */
	2786	if (hfs_start_transaction(hfsmp) != 0) {
	2787	retval = EINVAL;
	2788	goto Err_Exit;
	2789	}
	2790
	2791	if (fp->ff_unallocblocks == 0) {
	2792	/* Protect extents b-tree and allocation bitmap */
	2793	lockflags = SFL_BITMAP;
	2794	if (overflow_extents(fp))
	2795	lockflags \|= SFL_EXTENTS;
	2796	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2797
	2798	retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
	2799	(FCB*)fp, length, false));
	2800
	2801	hfs_systemfile_unlock(hfsmp, lockflags);
	2802	}
	2803	if (hfsmp->jnl) {
	2804	if (retval == 0) {
	2805	fp->ff_size = length;
	2806	}
	2807	if (skipupdate) {
	2808	(void) hfs_minorupdate(vp);
	2809	}
	2810	else {
	2811	(void) hfs_update(vp, TRUE);
	2812	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2813	}
	2814	}
	2815	hfs_end_transaction(hfsmp);
	2816
	2817	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	2818	if (retval)
	2819	goto Err_Exit;
	2820	#if QUOTA
	2821	/* These are bytesreleased */
	2822	(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
	2823	#endif /* QUOTA */
	2824	}
	2825	/* Only set update flag if the logical length changes */
	2826	if ((off_t)fp->ff_size != length)
	2827	cp->c_touch_modtime = TRUE;
	2828	fp->ff_size = length;
	2829	}
	2830	if (cp->c_mode & (S_ISUID \| S_ISGID)) {
	2831	if (!vfs_context_issuser(context)) {
	2832	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	2833	skipupdate = 0;
	2834	}
	2835	}
	2836	if (skipupdate) {
	2837	retval = hfs_minorupdate(vp);
	2838	}
	2839	else {
	2840	cp->c_touch_chgtime = TRUE; /* status changed */
	2841	cp->c_touch_modtime = TRUE; /* file data was modified */
	2842	retval = hfs_update(vp, MNT_WAIT);
	2843	}
	2844	if (retval) {
	2845	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	2846	-1, -1, -1, retval, 0);
	2847	}
	2848
	2849	Err_Exit:
	2850
	2851	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_END,
	2852	(int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
	2853
	2854	return (retval);
	2855	}
	2856
	2857
	2858
	2859	/*
	2860	* Truncate a cnode to at most length size, freeing (or adding) the
	2861	* disk blocks.
	2862	*/
	2863	__private_extern__
	2864	int
	2865	hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
	2866	int skipupdate, vfs_context_t context)
	2867	{
	2868	struct filefork *fp = VTOF(vp);
	2869	off_t filebytes;
	2870	u_int32_t fileblocks;
	2871	int blksize, error = 0;
	2872	struct cnode *cp = VTOC(vp);
	2873
	2874	/* Cannot truncate an HFS directory! */
	2875	if (vnode_isdir(vp)) {
	2876	return (EISDIR);
	2877	}
	2878	/* A swap file cannot change size. */
	2879	if (vnode_isswap(vp) && (length != 0)) {
	2880	return (EPERM);
	2881	}
	2882
	2883	blksize = VTOVCB(vp)->blockSize;
	2884	fileblocks = fp->ff_blocks;
	2885	filebytes = (off_t)fileblocks * (off_t)blksize;
	2886
	2887	//
	2888	// Have to do this here so that we don't wind up with
	2889	// i/o pending for blocks that are about to be released
	2890	// if we truncate the file.
	2891	//
	2892	// If skipsetsize is set, then the caller is responsible
	2893	// for the ubc_setsize.
	2894	//
	2895	// Even if skipsetsize is set, if the length is zero we
	2896	// want to call ubc_setsize() because as of SnowLeopard
	2897	// it will no longer cause any page-ins and it will drop
	2898	// any dirty pages so that we don't do any i/o that we
	2899	// don't have to. This also prevents a race where i/o
	2900	// for truncated blocks may overwrite later data if the
	2901	// blocks get reallocated to a different file.
	2902	//
	2903	if (!skipsetsize \|\| length == 0)
	2904	ubc_setsize(vp, length);
	2905
	2906	// have to loop truncating or growing files that are
	2907	// really big because otherwise transactions can get
	2908	// enormous and consume too many kernel resources.
	2909
	2910	if (length < filebytes) {
	2911	while (filebytes > length) {
	2912	if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
	2913	filebytes -= HFS_BIGFILE_SIZE;
	2914	} else {
	2915	filebytes = length;
	2916	}
	2917	cp->c_flag \|= C_FORCEUPDATE;
	2918	error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
	2919	if (error)
	2920	break;
	2921	}
	2922	} else if (length > filebytes) {
	2923	while (filebytes < length) {
	2924	if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
	2925	filebytes += HFS_BIGFILE_SIZE;
	2926	} else {
	2927	filebytes = length;
	2928	}
	2929	cp->c_flag \|= C_FORCEUPDATE;
	2930	error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
	2931	if (error)
	2932	break;
	2933	}
	2934	} else /* Same logical size */ {
	2935
	2936	error = do_hfs_truncate(vp, length, flags, skipupdate, context);
	2937	}
	2938	/* Files that are changing size are not hot file candidates. */
	2939	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
	2940	fp->ff_bytesread = 0;
	2941	}
	2942
	2943	return (error);
	2944	}
	2945
	2946
	2947
	2948	/*
	2949	* Preallocate file storage space.
	2950	*/
	2951	int
	2952	hfs_vnop_allocate(struct vnop_allocate_args /* {
	2953	vnode_t a_vp;
	2954	off_t a_length;
	2955	u_int32_t a_flags;
	2956	off_t *a_bytesallocated;
	2957	off_t a_offset;
	2958	vfs_context_t a_context;
	2959	} / ap)
	2960	{
	2961	struct vnode *vp = ap->a_vp;
	2962	struct cnode *cp;
	2963	struct filefork *fp;
	2964	ExtendedVCB *vcb;
	2965	off_t length = ap->a_length;
	2966	off_t startingPEOF;
	2967	off_t moreBytesRequested;
	2968	off_t actualBytesAdded;
	2969	off_t filebytes;
	2970	u_int32_t fileblocks;
	2971	int retval, retval2;
	2972	u_int32_t blockHint;
	2973	u_int32_t extendFlags; /* For call to ExtendFileC */
	2974	struct hfsmount *hfsmp;
	2975	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
	2976	int lockflags;
	2977
	2978	*(ap->a_bytesallocated) = 0;
	2979
	2980	if (!vnode_isreg(vp))
	2981	return (EISDIR);
	2982	if (length < (off_t)0)
	2983	return (EINVAL);
	2984
	2985	cp = VTOC(vp);
	2986
	2987	hfs_lock_truncate(cp, TRUE);
	2988
	2989	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
	2990	goto Err_Exit;
	2991	}
	2992
	2993	fp = VTOF(vp);
	2994	hfsmp = VTOHFS(vp);
	2995	vcb = VTOVCB(vp);
	2996
	2997	fileblocks = fp->ff_blocks;
	2998	filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
	2999
	3000	if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
	3001	retval = EINVAL;
	3002	goto Err_Exit;
	3003	}
	3004
	3005	/* Fill in the flags word for the call to Extend the file */
	3006
	3007	extendFlags = kEFNoClumpMask;
	3008	if (ap->a_flags & ALLOCATECONTIG)
	3009	extendFlags \|= kEFContigMask;
	3010	if (ap->a_flags & ALLOCATEALL)
	3011	extendFlags \|= kEFAllMask;
	3012	if (cred && suser(cred, NULL) != 0)
	3013	extendFlags \|= kEFReserveMask;
	3014	if (hfs_virtualmetafile(cp))
	3015	extendFlags \|= kEFMetadataMask;
	3016
	3017	retval = E_NONE;
	3018	blockHint = 0;
	3019	startingPEOF = filebytes;
	3020
	3021	if (ap->a_flags & ALLOCATEFROMPEOF)
	3022	length += filebytes;
	3023	else if (ap->a_flags & ALLOCATEFROMVOL)
	3024	blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
	3025
	3026	/* If no changes are necesary, then we're done */
	3027	if (filebytes == length)
	3028	goto Std_Exit;
	3029
	3030	/*
	3031	* Lengthen the size of the file. We must ensure that the
	3032	* last byte of the file is allocated. Since the smallest
	3033	* value of filebytes is 0, length will be at least 1.
	3034	*/
	3035	if (length > filebytes) {
	3036	off_t total_bytes_added = 0, orig_request_size;
	3037
	3038	orig_request_size = moreBytesRequested = length - filebytes;
	3039
	3040	#if QUOTA
	3041	retval = hfs_chkdq(cp,
	3042	(int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
	3043	cred, 0);
	3044	if (retval)
	3045	goto Err_Exit;
	3046
	3047	#endif /* QUOTA */
	3048	/*
	3049	* Metadata zone checks.
	3050	*/
	3051	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
	3052	/*
	3053	* Allocate Journal and Quota files in metadata zone.
	3054	*/
	3055	if (hfs_virtualmetafile(cp)) {
	3056	blockHint = hfsmp->hfs_metazone_start;
	3057	} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
	3058	(blockHint <= hfsmp->hfs_metazone_end)) {
	3059	/*
	3060	* Move blockHint outside metadata zone.
	3061	*/
	3062	blockHint = hfsmp->hfs_metazone_end + 1;
	3063	}
	3064	}
	3065
	3066
	3067	while ((length > filebytes) && (retval == E_NONE)) {
	3068	off_t bytesRequested;
	3069
	3070	if (hfs_start_transaction(hfsmp) != 0) {
	3071	retval = EINVAL;
	3072	goto Err_Exit;
	3073	}
	3074
	3075	/* Protect extents b-tree and allocation bitmap */
	3076	lockflags = SFL_BITMAP;
	3077	if (overflow_extents(fp))
	3078	lockflags \|= SFL_EXTENTS;
	3079	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	3080
	3081	if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
	3082	bytesRequested = HFS_BIGFILE_SIZE;
	3083	} else {
	3084	bytesRequested = moreBytesRequested;
	3085	}
	3086
	3087	if (extendFlags & kEFContigMask) {
	3088	// if we're on a sparse device, this will force it to do a
	3089	// full scan to find the space needed.
	3090	hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
	3091	}
	3092
	3093	retval = MacToVFSError(ExtendFileC(vcb,
	3094	(FCB*)fp,
	3095	bytesRequested,
	3096	blockHint,
	3097	extendFlags,
	3098	&actualBytesAdded));
	3099
	3100	if (retval == E_NONE) {
	3101	*(ap->a_bytesallocated) += actualBytesAdded;
	3102	total_bytes_added += actualBytesAdded;
	3103	moreBytesRequested -= actualBytesAdded;
	3104	if (blockHint != 0) {
	3105	blockHint += actualBytesAdded / vcb->blockSize;
	3106	}
	3107	}
	3108	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	3109
	3110	hfs_systemfile_unlock(hfsmp, lockflags);
	3111
	3112	if (hfsmp->jnl) {
	3113	(void) hfs_update(vp, TRUE);
	3114	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	3115	}
	3116
	3117	hfs_end_transaction(hfsmp);
	3118	}
	3119
	3120
	3121	/*
	3122	* if we get an error and no changes were made then exit
	3123	* otherwise we must do the hfs_update to reflect the changes
	3124	*/
	3125	if (retval && (startingPEOF == filebytes))
	3126	goto Err_Exit;
	3127
	3128	/*
	3129	* Adjust actualBytesAdded to be allocation block aligned, not
	3130	* clump size aligned.
	3131	* NOTE: So what we are reporting does not affect reality
	3132	* until the file is closed, when we truncate the file to allocation
	3133	* block size.
	3134	*/
	3135	if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
	3136	*(ap->a_bytesallocated) =
	3137	roundup(orig_request_size, (off_t)vcb->blockSize);
	3138
	3139	} else { /* Shorten the size of the file */
	3140
	3141	if (fp->ff_size > length) {
	3142	/*
	3143	* Any buffers that are past the truncation point need to be
	3144	* invalidated (to maintain buffer cache consistency).
	3145	*/
	3146	}
	3147
	3148	retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
	3149	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	3150
	3151	/*
	3152	* if we get an error and no changes were made then exit
	3153	* otherwise we must do the hfs_update to reflect the changes
	3154	*/
	3155	if (retval && (startingPEOF == filebytes)) goto Err_Exit;
	3156	#if QUOTA
	3157	/* These are bytesreleased */
	3158	(void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
	3159	#endif /* QUOTA */
	3160
	3161	if (fp->ff_size > filebytes) {
	3162	fp->ff_size = filebytes;
	3163
	3164	hfs_unlock(cp);
	3165	ubc_setsize(vp, fp->ff_size);
	3166	hfs_lock(cp, HFS_FORCE_LOCK);
	3167	}
	3168	}
	3169
	3170	Std_Exit:
	3171	cp->c_touch_chgtime = TRUE;
	3172	cp->c_touch_modtime = TRUE;
	3173	retval2 = hfs_update(vp, MNT_WAIT);
	3174
	3175	if (retval == 0)
	3176	retval = retval2;
	3177	Err_Exit:
	3178	hfs_unlock_truncate(cp, TRUE);
	3179	hfs_unlock(cp);
	3180	return (retval);
	3181	}
	3182
	3183
	3184	/*
	3185	* Pagein for HFS filesystem
	3186	*/
	3187	int
	3188	hfs_vnop_pagein(struct vnop_pagein_args *ap)
	3189	/*
	3190	struct vnop_pagein_args {
	3191	vnode_t a_vp,
	3192	upl_t a_pl,
	3193	vm_offset_t a_pl_offset,
	3194	off_t a_f_offset,
	3195	size_t a_size,
	3196	int a_flags
	3197	vfs_context_t a_context;
	3198	};
	3199	*/
	3200	{
	3201	vnode_t vp = ap->a_vp;
	3202	int error;
	3203
	3204	#if HFS_COMPRESSION
	3205	if (VNODE_IS_RSRC(vp)) {
	3206	/* allow pageins of the resource fork */
	3207	} else {
	3208	int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
	3209	if (compressed) {
	3210	error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
	3211	if (compressed) {
	3212	if (error == 0) {
	3213	/* successful page-in, update the access time */
	3214	VTOC(vp)->c_touch_acctime = TRUE;
	3215
	3216	/* compressed files are not hot file candidates */
	3217	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
	3218	VTOF(vp)->ff_bytesread = 0;
	3219	}
	3220	}
	3221	return error;
	3222	}
	3223	/* otherwise the file was converted back to a regular file while we were reading it */
	3224	}
	3225	}
	3226	#endif
	3227
	3228	error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
	3229	ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
	3230	/*
	3231	* Keep track of blocks read.
	3232	*/
	3233	if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
	3234	struct cnode *cp;
	3235	struct filefork *fp;
	3236	int bytesread;
	3237	int took_cnode_lock = 0;
	3238
	3239	cp = VTOC(vp);
	3240	fp = VTOF(vp);
	3241
	3242	if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
	3243	bytesread = fp->ff_size;
	3244	else
	3245	bytesread = ap->a_size;
	3246
	3247	/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
	3248	if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
	3249	hfs_lock(cp, HFS_FORCE_LOCK);
	3250	took_cnode_lock = 1;
	3251	}
	3252	/*
	3253	* If this file hasn't been seen since the start of
	3254	* the current sampling period then start over.
	3255	*/
	3256	if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
	3257	struct timeval tv;
	3258
	3259	fp->ff_bytesread = bytesread;
	3260	microtime(&tv);
	3261	cp->c_atime = tv.tv_sec;
	3262	} else {
	3263	fp->ff_bytesread += bytesread;
	3264	}
	3265	cp->c_touch_acctime = TRUE;
	3266	if (took_cnode_lock)
	3267	hfs_unlock(cp);
	3268	}
	3269	return (error);
	3270	}
	3271
	3272	/*
	3273	* Pageout for HFS filesystem.
	3274	*/
	3275	int
	3276	hfs_vnop_pageout(struct vnop_pageout_args *ap)
	3277	/*
	3278	struct vnop_pageout_args {
	3279	vnode_t a_vp,
	3280	upl_t a_pl,
	3281	vm_offset_t a_pl_offset,
	3282	off_t a_f_offset,
	3283	size_t a_size,
	3284	int a_flags
	3285	vfs_context_t a_context;
	3286	};
	3287	*/
	3288	{
	3289	vnode_t vp = ap->a_vp;
	3290	struct cnode *cp;
	3291	struct filefork *fp;
	3292	int retval = 0;
	3293	off_t filesize;
	3294	upl_t upl;
	3295	upl_page_info_t* pl;
	3296	vm_offset_t a_pl_offset;
	3297	int a_flags;
	3298	int is_pageoutv2 = 0;
	3299
	3300	cp = VTOC(vp);
	3301	fp = VTOF(vp);
	3302
	3303	/*
	3304	* Figure out where the file ends, for pageout purposes. If
	3305	* ff_new_size > ff_size, then we're in the middle of extending the
	3306	* file via a write, so it is safe (and necessary) that we be able
	3307	* to pageout up to that point.
	3308	*/
	3309	filesize = fp->ff_size;
	3310	if (fp->ff_new_size > filesize)
	3311	filesize = fp->ff_new_size;
	3312
	3313	a_flags = ap->a_flags;
	3314	a_pl_offset = ap->a_pl_offset;
	3315
	3316	/*
	3317	* we can tell if we're getting the new or old behavior from the UPL
	3318	*/
	3319	if ((upl = ap->a_pl) == NULL) {
	3320	int request_flags;
	3321
	3322	is_pageoutv2 = 1;
	3323	/*
	3324	* we're in control of any UPL we commit
	3325	* make sure someone hasn't accidentally passed in UPL_NOCOMMIT
	3326	*/
	3327	a_flags &= ~UPL_NOCOMMIT;
	3328	a_pl_offset = 0;
	3329
	3330	/*
	3331	* take truncate lock (shared) to guard against
	3332	* zero-fill thru fsync interfering, but only for v2
	3333	*/
	3334	hfs_lock_truncate(cp, 0);
	3335
	3336	if (a_flags & UPL_MSYNC) {
	3337	request_flags = UPL_UBC_MSYNC \| UPL_RET_ONLY_DIRTY;
	3338	}
	3339	else {
	3340	request_flags = UPL_UBC_PAGEOUT \| UPL_RET_ONLY_DIRTY;
	3341	}
	3342	ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
	3343
	3344	if (upl == (upl_t) NULL) {
	3345	retval = EINVAL;
	3346	goto pageout_done;
	3347	}
	3348	}
	3349	/*
	3350	* from this point forward upl points at the UPL we're working with
	3351	* it was either passed in or we succesfully created it
	3352	*/
	3353
	3354	/*
	3355	* Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
	3356	* UPL instead of relying on the UPL passed into us. We go ahead and do that here,
	3357	* scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
	3358	* N dirty ranges in the UPL. Note that this is almost a direct copy of the
	3359	* logic in vnode_pageout except that we need to do it after grabbing the truncate
	3360	* lock in HFS so that we don't lock invert ourselves.
	3361	*
	3362	* Note that we can still get into this function on behalf of the default pager with
	3363	* non-V2 behavior (swapfiles). However in that case, we did not grab locks above
	3364	* since fsync and other writing threads will grab the locks, then mark the
	3365	* relevant pages as busy. But the pageout codepath marks the pages as busy,
	3366	* and THEN would attempt to grab the truncate lock, which would result in deadlock. So
	3367	* we do not try to grab anything for the pre-V2 case, which should only be accessed
	3368	* by the paging/VM system.
	3369	*/
	3370
	3371	if (is_pageoutv2) {
	3372	off_t f_offset;
	3373	int offset;
	3374	int isize;
	3375	int pg_index;
	3376	int error;
	3377	int error_ret = 0;
	3378
	3379	isize = ap->a_size;
	3380	f_offset = ap->a_f_offset;
	3381
	3382	/*
	3383	* Scan from the back to find the last page in the UPL, so that we
	3384	* aren't looking at a UPL that may have already been freed by the
	3385	* preceding aborts/completions.
	3386	*/
	3387	for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
	3388	if (upl_page_present(pl, --pg_index))
	3389	break;
	3390	if (pg_index == 0) {
	3391	ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
	3392	goto pageout_done;
	3393	}
	3394	}
	3395
	3396	/*
	3397	* initialize the offset variables before we touch the UPL.
	3398	* a_f_offset is the position into the file, in bytes
	3399	* offset is the position into the UPL, in bytes
	3400	* pg_index is the pg# of the UPL we're operating on.
	3401	* isize is the offset into the UPL of the last non-clean page.
	3402	*/
	3403	isize = ((pg_index + 1) * PAGE_SIZE);
	3404
	3405	offset = 0;
	3406	pg_index = 0;
	3407
	3408	while (isize) {
	3409	int xsize;
	3410	int num_of_pages;
	3411
	3412	if ( !upl_page_present(pl, pg_index)) {
	3413	/*
	3414	* we asked for RET_ONLY_DIRTY, so it's possible
	3415	* to get back empty slots in the UPL.
	3416	* just skip over them
	3417	*/
	3418	f_offset += PAGE_SIZE;
	3419	offset += PAGE_SIZE;
	3420	isize -= PAGE_SIZE;
	3421	pg_index++;
	3422
	3423	continue;
	3424	}
	3425	if ( !upl_dirty_page(pl, pg_index)) {
	3426	panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
	3427	}
	3428
	3429	/*
	3430	* We know that we have at least one dirty page.
	3431	* Now checking to see how many in a row we have
	3432	*/
	3433	num_of_pages = 1;
	3434	xsize = isize - PAGE_SIZE;
	3435
	3436	while (xsize) {
	3437	if ( !upl_dirty_page(pl, pg_index + num_of_pages))
	3438	break;
	3439	num_of_pages++;
	3440	xsize -= PAGE_SIZE;
	3441	}
	3442	xsize = num_of_pages * PAGE_SIZE;
	3443
	3444	if (!vnode_isswap(vp)) {
	3445	off_t end_of_range;
	3446	int tooklock;
	3447
	3448	tooklock = 0;
	3449
	3450	if (cp->c_lockowner != current_thread()) {
	3451	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
	3452	/*
	3453	* we're in the v2 path, so we are the
	3454	* owner of the UPL... we may have already
	3455	* processed some of the UPL, so abort it
	3456	* from the current working offset to the
	3457	* end of the UPL
	3458	*/
	3459	ubc_upl_abort_range(upl,
	3460	offset,
	3461	ap->a_size - offset,
	3462	UPL_ABORT_FREE_ON_EMPTY);
	3463	goto pageout_done;
	3464	}
	3465	tooklock = 1;
	3466	}
	3467	end_of_range = f_offset + xsize - 1;
	3468
	3469	if (end_of_range >= filesize) {
	3470	end_of_range = (off_t)(filesize - 1);
	3471	}
	3472	if (f_offset < filesize) {
	3473	rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
	3474	cp->c_flag \|= C_MODIFIED; /* leof is dirty */
	3475	}
	3476	if (tooklock) {
	3477	hfs_unlock(cp);
	3478	}
	3479	}
	3480	if ((error = cluster_pageout(vp, upl, offset, f_offset,
	3481	xsize, filesize, a_flags))) {
	3482	if (error_ret == 0)
	3483	error_ret = error;
	3484	}
	3485	f_offset += xsize;
	3486	offset += xsize;
	3487	isize -= xsize;
	3488	pg_index += num_of_pages;
	3489	}
	3490	/* capture errnos bubbled out of cluster_pageout if they occurred */
	3491	if (error_ret != 0) {
	3492	retval = error_ret;
	3493	}
	3494	} /* end block for v2 pageout behavior */
	3495	else {
	3496	if (!vnode_isswap(vp)) {
	3497	off_t end_of_range;
	3498	int tooklock = 0;
	3499
	3500	if (cp->c_lockowner != current_thread()) {
	3501	if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
	3502	if (!(a_flags & UPL_NOCOMMIT)) {
	3503	ubc_upl_abort_range(upl,
	3504	a_pl_offset,
	3505	ap->a_size,
	3506	UPL_ABORT_FREE_ON_EMPTY);
	3507	}
	3508	goto pageout_done;
	3509	}
	3510	tooklock = 1;
	3511	}
	3512	end_of_range = ap->a_f_offset + ap->a_size - 1;
	3513
	3514	if (end_of_range >= filesize) {
	3515	end_of_range = (off_t)(filesize - 1);
	3516	}
	3517	if (ap->a_f_offset < filesize) {
	3518	rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
	3519	cp->c_flag \|= C_MODIFIED; /* leof is dirty */
	3520	}
	3521
	3522	if (tooklock) {
	3523	hfs_unlock(cp);
	3524	}
	3525	}
	3526	/*
	3527	* just call cluster_pageout for old pre-v2 behavior
	3528	*/
	3529	retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
	3530	ap->a_size, filesize, a_flags);
	3531	}
	3532
	3533	/*
	3534	* If data was written, update the modification time of the file.
	3535	* If setuid or setgid bits are set and this process is not the
	3536	* superuser then clear the setuid and setgid bits as a precaution
	3537	* against tampering.
	3538	*/
	3539	if (retval == 0) {
	3540	cp->c_touch_modtime = TRUE;
	3541	cp->c_touch_chgtime = TRUE;
	3542	if ((cp->c_mode & (S_ISUID \| S_ISGID)) &&
	3543	(vfs_context_suser(ap->a_context) != 0)) {
	3544	hfs_lock(cp, HFS_FORCE_LOCK);
	3545	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	3546	hfs_unlock(cp);
	3547	}
	3548	}
	3549
	3550	pageout_done:
	3551	if (is_pageoutv2) {
	3552	/* release truncate lock (shared) */
	3553	hfs_unlock_truncate(cp, 0);
	3554	}
	3555	return (retval);
	3556	}
	3557
	3558	/*
	3559	* Intercept B-Tree node writes to unswap them if necessary.
	3560	*/
	3561	int
	3562	hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
	3563	{
	3564	int retval = 0;
	3565	register struct buf *bp = ap->a_bp;
	3566	register struct vnode *vp = buf_vnode(bp);
	3567	BlockDescriptor block;
	3568
	3569	/* Trap B-Tree writes */
	3570	if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) \|\|
	3571	(VTOC(vp)->c_fileid == kHFSCatalogFileID) \|\|
	3572	(VTOC(vp)->c_fileid == kHFSAttributesFileID) \|\|
	3573	(vp == VTOHFS(vp)->hfc_filevp)) {
	3574
	3575	/*
	3576	* Swap and validate the node if it is in native byte order.
	3577	* This is always be true on big endian, so we always validate
	3578	* before writing here. On little endian, the node typically has
	3579	* been swapped and validated when it was written to the journal,
	3580	* so we won't do anything here.
	3581	*/
	3582	if (((u_int16_t )((char )buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
	3583	/* Prepare the block pointer */
	3584	block.blockHeader = bp;
	3585	block.buffer = (char *)buf_dataptr(bp);
	3586	block.blockNum = buf_lblkno(bp);
	3587	/* not found in cache ==> came from disk */
	3588	block.blockReadFromDisk = (buf_fromcache(bp) == 0);
	3589	block.blockSize = buf_count(bp);
	3590
	3591	/* Endian un-swap B-Tree node */
	3592	retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
	3593	if (retval)
	3594	panic("hfs_vnop_bwrite: about to write corrupt node!\n");
	3595	}
	3596	}
	3597
	3598	/* This buffer shouldn't be locked anymore but if it is clear it */
	3599	if ((buf_flags(bp) & B_LOCKED)) {
	3600	// XXXdbg
	3601	if (VTOHFS(vp)->jnl) {
	3602	panic("hfs: CLEARING the lock bit on bp %p\n", bp);
	3603	}
	3604	buf_clearflags(bp, B_LOCKED);
	3605	}
	3606	retval = vn_bwrite (ap);
	3607
	3608	return (retval);
	3609	}
	3610
	3611	/*
	3612	* Relocate a file to a new location on disk
	3613	* cnode must be locked on entry
	3614	*
	3615	* Relocation occurs by cloning the file's data from its
	3616	* current set of blocks to a new set of blocks. During
	3617	* the relocation all of the blocks (old and new) are
	3618	* owned by the file.
	3619	*
	3620	* -----------------
	3621	* \|///////////////\|
	3622	* -----------------
	3623	* 0 N (file offset)
	3624	*
	3625	* ----------------- -----------------
	3626	* \|///////////////\| \| \| STEP 1 (acquire new blocks)
	3627	* ----------------- -----------------
	3628	* 0 N N+1 2N
	3629	*
	3630	* ----------------- -----------------
	3631	* \|///////////////\| \|///////////////\| STEP 2 (clone data)
	3632	* ----------------- -----------------
	3633	* 0 N N+1 2N
	3634	*
	3635	* -----------------
	3636	* \|///////////////\| STEP 3 (head truncate blocks)
	3637	* -----------------
	3638	* 0 N
	3639	*
	3640	* During steps 2 and 3 page-outs to file offsets less
	3641	* than or equal to N are suspended.
	3642	*
	3643	* During step 3 page-ins to the file get suspended.
	3644	*/
	3645	__private_extern__
	3646	int
	3647	hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
	3648	struct proc *p)
	3649	{
	3650	struct cnode *cp;
	3651	struct filefork *fp;
	3652	struct hfsmount *hfsmp;
	3653	u_int32_t headblks;
	3654	u_int32_t datablks;
	3655	u_int32_t blksize;
	3656	u_int32_t growsize;
	3657	u_int32_t nextallocsave;
	3658	daddr64_t sector_a, sector_b;
	3659	int eflags;
	3660	off_t newbytes;
	3661	int retval;
	3662	int lockflags = 0;
	3663	int took_trunc_lock = 0;
	3664	int started_tr = 0;
	3665	enum vtype vnodetype;
	3666
	3667	vnodetype = vnode_vtype(vp);
	3668	if (vnodetype != VREG && vnodetype != VLNK) {
	3669	return (EPERM);
	3670	}
	3671
	3672	hfsmp = VTOHFS(vp);
	3673	if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
	3674	return (ENOSPC);
	3675	}
	3676
	3677	cp = VTOC(vp);
	3678	fp = VTOF(vp);
	3679	if (fp->ff_unallocblocks)
	3680	return (EINVAL);
	3681	blksize = hfsmp->blockSize;
	3682	if (blockHint == 0)
	3683	blockHint = hfsmp->nextAllocation;
	3684
	3685	if ((fp->ff_size > 0x7fffffff) \|\|
	3686	((fp->ff_size > blksize) && vnodetype == VLNK)) {
	3687	return (EFBIG);
	3688	}
	3689
	3690	//
	3691	// We do not believe that this call to hfs_fsync() is
	3692	// necessary and it causes a journal transaction
	3693	// deadlock so we are removing it.
	3694	//
	3695	//if (vnodetype == VREG && !vnode_issystem(vp)) {
	3696	// retval = hfs_fsync(vp, MNT_WAIT, 0, p);
	3697	// if (retval)
	3698	// return (retval);
	3699	//}
	3700
	3701	if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
	3702	hfs_unlock(cp);
	3703	hfs_lock_truncate(cp, TRUE);
	3704	/* Force lock since callers expects lock to be held. */
	3705	if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
	3706	hfs_unlock_truncate(cp, TRUE);
	3707	return (retval);
	3708	}
	3709	/* No need to continue if file was removed. */
	3710	if (cp->c_flag & C_NOEXISTS) {
	3711	hfs_unlock_truncate(cp, TRUE);
	3712	return (ENOENT);
	3713	}
	3714	took_trunc_lock = 1;
	3715	}
	3716	headblks = fp->ff_blocks;
	3717	datablks = howmany(fp->ff_size, blksize);
	3718	growsize = datablks * blksize;
	3719	eflags = kEFContigMask \| kEFAllMask \| kEFNoClumpMask;
	3720	if (blockHint >= hfsmp->hfs_metazone_start &&
	3721	blockHint <= hfsmp->hfs_metazone_end)
	3722	eflags \|= kEFMetadataMask;
	3723
	3724	if (hfs_start_transaction(hfsmp) != 0) {
	3725	if (took_trunc_lock)
	3726	hfs_unlock_truncate(cp, TRUE);
	3727	return (EINVAL);
	3728	}
	3729	started_tr = 1;
	3730	/*
	3731	* Protect the extents b-tree and the allocation bitmap
	3732	* during MapFileBlockC and ExtendFileC operations.
	3733	*/
	3734	lockflags = SFL_BITMAP;
	3735	if (overflow_extents(fp))
	3736	lockflags \|= SFL_EXTENTS;
	3737	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	3738
	3739	retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
	3740	if (retval) {
	3741	retval = MacToVFSError(retval);
	3742	goto out;
	3743	}
	3744
	3745	/*
	3746	* STEP 1 - acquire new allocation blocks.
	3747	*/
	3748	nextallocsave = hfsmp->nextAllocation;
	3749	retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
	3750	if (eflags & kEFMetadataMask) {
	3751	HFS_MOUNT_LOCK(hfsmp, TRUE);
	3752	HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
	3753	MarkVCBDirty(hfsmp);
	3754	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	3755	}
	3756
	3757	retval = MacToVFSError(retval);
	3758	if (retval == 0) {
	3759	cp->c_flag \|= C_MODIFIED;
	3760	if (newbytes < growsize) {
	3761	retval = ENOSPC;
	3762	goto restore;
	3763	} else if (fp->ff_blocks < (headblks + datablks)) {
	3764	printf("hfs_relocate: allocation failed");
	3765	retval = ENOSPC;
	3766	goto restore;
	3767	}
	3768
	3769	retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
	3770	if (retval) {
	3771	retval = MacToVFSError(retval);
	3772	} else if ((sector_a + 1) == sector_b) {
	3773	retval = ENOSPC;
	3774	goto restore;
	3775	} else if ((eflags & kEFMetadataMask) &&
	3776	((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
	3777	hfsmp->hfs_metazone_end)) {
	3778	#if 0
	3779	const char * filestr;
	3780	char emptystr = '\0';
	3781
	3782	if (cp->c_desc.cd_nameptr != NULL) {
	3783	filestr = (const char *)&cp->c_desc.cd_nameptr[0];
	3784	} else if (vnode_name(vp) != NULL) {
	3785	filestr = vnode_name(vp);
	3786	} else {
	3787	filestr = &emptystr;
	3788	}
	3789	#endif
	3790	retval = ENOSPC;
	3791	goto restore;
	3792	}
	3793	}
	3794	/* Done with system locks and journal for now. */
	3795	hfs_systemfile_unlock(hfsmp, lockflags);
	3796	lockflags = 0;
	3797	hfs_end_transaction(hfsmp);
	3798	started_tr = 0;
	3799
	3800	if (retval) {
	3801	/*
	3802	* Check to see if failure is due to excessive fragmentation.
	3803	*/
	3804	if ((retval == ENOSPC) &&
	3805	(hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
	3806	hfsmp->hfs_flags \|= HFS_FRAGMENTED_FREESPACE;
	3807	}
	3808	goto out;
	3809	}
	3810	/*
	3811	* STEP 2 - clone file data into the new allocation blocks.
	3812	*/
	3813
	3814	if (vnodetype == VLNK)
	3815	retval = hfs_clonelink(vp, blksize, cred, p);
	3816	else if (vnode_issystem(vp))
	3817	retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
	3818	else
	3819	retval = hfs_clonefile(vp, headblks, datablks, blksize);
	3820
	3821	/* Start transaction for step 3 or for a restore. */
	3822	if (hfs_start_transaction(hfsmp) != 0) {
	3823	retval = EINVAL;
	3824	goto out;
	3825	}
	3826	started_tr = 1;
	3827	if (retval)
	3828	goto restore;
	3829
	3830	/*
	3831	* STEP 3 - switch to cloned data and remove old blocks.
	3832	*/
	3833	lockflags = SFL_BITMAP;
	3834	if (overflow_extents(fp))
	3835	lockflags \|= SFL_EXTENTS;
	3836	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	3837
	3838	retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
	3839
	3840	hfs_systemfile_unlock(hfsmp, lockflags);
	3841	lockflags = 0;
	3842	if (retval)
	3843	goto restore;
	3844	out:
	3845	if (took_trunc_lock)
	3846	hfs_unlock_truncate(cp, TRUE);
	3847
	3848	if (lockflags) {
	3849	hfs_systemfile_unlock(hfsmp, lockflags);
	3850	lockflags = 0;
	3851	}
	3852
	3853	/* Push cnode's new extent data to disk. */
	3854	if (retval == 0) {
	3855	(void) hfs_update(vp, MNT_WAIT);
	3856	}
	3857	if (hfsmp->jnl) {
	3858	if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
	3859	(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
	3860	else
	3861	(void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	3862	}
	3863	exit:
	3864	if (started_tr)
	3865	hfs_end_transaction(hfsmp);
	3866
	3867	return (retval);
	3868
	3869	restore:
	3870	if (fp->ff_blocks == headblks) {
	3871	if (took_trunc_lock)
	3872	hfs_unlock_truncate(cp, TRUE);
	3873	goto exit;
	3874	}
	3875	/*
	3876	* Give back any newly allocated space.
	3877	*/
	3878	if (lockflags == 0) {
	3879	lockflags = SFL_BITMAP;
	3880	if (overflow_extents(fp))
	3881	lockflags \|= SFL_EXTENTS;
	3882	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	3883	}
	3884
	3885	(void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
	3886
	3887	hfs_systemfile_unlock(hfsmp, lockflags);
	3888	lockflags = 0;
	3889
	3890	if (took_trunc_lock)
	3891	hfs_unlock_truncate(cp, TRUE);
	3892	goto exit;
	3893	}
	3894
	3895
	3896	/*
	3897	* Clone a symlink.
	3898	*
	3899	*/
	3900	static int
	3901	hfs_clonelink(struct vnode vp, int blksize, kauth_cred_t cred, __unused struct proc p)
	3902	{
	3903	struct buf *head_bp = NULL;
	3904	struct buf *tail_bp = NULL;
	3905	int error;
	3906
	3907
	3908	error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
	3909	if (error)
	3910	goto out;
	3911
	3912	tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
	3913	if (tail_bp == NULL) {
	3914	error = EIO;
	3915	goto out;
	3916	}
	3917	bcopy((char )buf_dataptr(head_bp), (char )buf_dataptr(tail_bp), blksize);
	3918	error = (int)buf_bwrite(tail_bp);
	3919	out:
	3920	if (head_bp) {
	3921	buf_markinvalid(head_bp);
	3922	buf_brelse(head_bp);
	3923	}
	3924	(void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
	3925
	3926	return (error);
	3927	}
	3928
	3929	/*
	3930	* Clone a file's data within the file.
	3931	*
	3932	*/
	3933	static int
	3934	hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
	3935	{
	3936	caddr_t bufp;
	3937	size_t bufsize;
	3938	size_t copysize;
	3939	size_t iosize;
	3940	size_t offset;
	3941	off_t writebase;
	3942	uio_t auio;
	3943	int error = 0;
	3944
	3945	writebase = blkstart * blksize;
	3946	copysize = blkcnt * blksize;
	3947	iosize = bufsize = MIN(copysize, 128 * 1024);
	3948	offset = 0;
	3949
	3950	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	3951	return (ENOMEM);
	3952	}
	3953	hfs_unlock(VTOC(vp));
	3954
	3955	auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
	3956
	3957	while (offset < copysize) {
	3958	iosize = MIN(copysize - offset, iosize);
	3959
	3960	uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
	3961	uio_addiov(auio, (uintptr_t)bufp, iosize);
	3962
	3963	error = cluster_read(vp, auio, copysize, IO_NOCACHE);
	3964	if (error) {
	3965	printf("hfs_clonefile: cluster_read failed - %d\n", error);
	3966	break;
	3967	}
	3968	if (uio_resid(auio) != 0) {
	3969	printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
	3970	error = EIO;
	3971	break;
	3972	}
	3973
	3974	uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
	3975	uio_addiov(auio, (uintptr_t)bufp, iosize);
	3976
	3977	error = cluster_write(vp, auio, writebase + offset,
	3978	writebase + offset + iosize,
	3979	uio_offset(auio), 0, IO_NOCACHE \| IO_SYNC);
	3980	if (error) {
	3981	printf("hfs_clonefile: cluster_write failed - %d\n", error);
	3982	break;
	3983	}
	3984	if (uio_resid(auio) != 0) {
	3985	printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
	3986	error = EIO;
	3987	break;
	3988	}
	3989	offset += iosize;
	3990	}
	3991	uio_free(auio);
	3992
	3993	if ((blksize & PAGE_MASK)) {
	3994	/*
	3995	* since the copy may not have started on a PAGE
	3996	* boundary (or may not have ended on one), we
	3997	* may have pages left in the cache since NOCACHE
	3998	* will let partially written pages linger...
	3999	* lets just flush the entire range to make sure
	4000	* we don't have any pages left that are beyond
	4001	* (or intersect) the real LEOF of this file
	4002	*/
	4003	ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE \| UBC_PUSHDIRTY);
	4004	} else {
	4005	/*
	4006	* No need to call ubc_sync_range or hfs_invalbuf
	4007	* since the file was copied using IO_NOCACHE and
	4008	* the copy was done starting and ending on a page
	4009	* boundary in the file.
	4010	*/
	4011	}
	4012	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	4013
	4014	hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
	4015	return (error);
	4016	}
	4017
	4018	/*
	4019	* Clone a system (metadata) file.
	4020	*
	4021	*/
	4022	static int
	4023	hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
	4024	kauth_cred_t cred, struct proc *p)
	4025	{
	4026	caddr_t bufp;
	4027	char * offset;
	4028	size_t bufsize;
	4029	size_t iosize;
	4030	struct buf *bp = NULL;
	4031	daddr64_t blkno;
	4032	daddr64_t blk;
	4033	daddr64_t start_blk;
	4034	daddr64_t last_blk;
	4035	int breadcnt;
	4036	int i;
	4037	int error = 0;
	4038
	4039
	4040	iosize = GetLogicalBlockSize(vp);
	4041	bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
	4042	breadcnt = bufsize / iosize;
	4043
	4044	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	4045	return (ENOMEM);
	4046	}
	4047	start_blk = ((daddr64_t)blkstart * blksize) / iosize;
	4048	last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
	4049	blkno = 0;
	4050
	4051	while (blkno < last_blk) {
	4052	/*
	4053	* Read up to a megabyte
	4054	*/
	4055	offset = bufp;
	4056	for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
	4057	error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
	4058	if (error) {
	4059	printf("hfs_clonesysfile: meta_bread error %d\n", error);
	4060	goto out;
	4061	}
	4062	if (buf_count(bp) != iosize) {
	4063	printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
	4064	goto out;
	4065	}
	4066	bcopy((char *)buf_dataptr(bp), offset, iosize);
	4067
	4068	buf_markinvalid(bp);
	4069	buf_brelse(bp);
	4070	bp = NULL;
	4071
	4072	offset += iosize;
	4073	}
	4074
	4075	/*
	4076	* Write up to a megabyte
	4077	*/
	4078	offset = bufp;
	4079	for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
	4080	bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
	4081	if (bp == NULL) {
	4082	printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
	4083	error = EIO;
	4084	goto out;
	4085	}
	4086	bcopy(offset, (char *)buf_dataptr(bp), iosize);
	4087	error = (int)buf_bwrite(bp);
	4088	bp = NULL;
	4089	if (error)
	4090	goto out;
	4091	offset += iosize;
	4092	}
	4093	}
	4094	out:
	4095	if (bp) {
	4096	buf_brelse(bp);
	4097	}
	4098
	4099	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	4100
	4101	error = hfs_fsync(vp, MNT_WAIT, 0, p);
	4102
	4103	return (error);
	4104	}