git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. Please obtain a copy of the License at
	10	* http://www.opensource.apple.com/apsl/ and read it before using this
	11	* file.
	12	*
	13	* The Original Code and all software distributed under the License are
	14	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	15	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	16	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	17	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	18	* Please see the License for the specific language governing rights and
	19	* limitations under the License.
	20	*
	21	* @APPLE_LICENSE_HEADER_END@
	22	*/
	23	/* @(#)hfs_readwrite.c 1.0
	24	*
	25	* (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
	26	*
	27	* hfs_readwrite.c -- vnode operations to deal with reading and writing files.
	28	*
	29	*/
	30
	31	#include <sys/param.h>
	32	#include <sys/systm.h>
	33	#include <sys/resourcevar.h>
	34	#include <sys/kernel.h>
	35	#include <sys/fcntl.h>
	36	#include <sys/filedesc.h>
	37	#include <sys/stat.h>
	38	#include <sys/buf.h>
	39	#include <sys/proc.h>
	40	#include <sys/kauth.h>
	41	#include <sys/vnode.h>
	42	#include <sys/uio.h>
	43	#include <sys/vfs_context.h>
	44
	45	#include <miscfs/specfs/specdev.h>
	46
	47	#include <sys/ubc.h>
	48	#include <vm/vm_pageout.h>
	49	#include <vm/vm_kern.h>
	50
	51	#include <sys/kdebug.h>
	52
	53	#include "hfs.h"
	54	#include "hfs_endian.h"
	55	#include "hfs_fsctl.h"
	56	#include "hfs_quota.h"
	57	#include "hfscommon/headers/FileMgrInternal.h"
	58	#include "hfscommon/headers/BTreesInternal.h"
	59	#include "hfs_cnode.h"
	60	#include "hfs_dbg.h"
	61
	62	extern int overflow_extents(struct filefork *fp);
	63
	64	#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
	65
	66	enum {
	67	MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
	68	};
	69
	70	extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
	71
	72	extern int hfs_setextendedsecurity(struct hfsmount *, int);
	73
	74
	75	static int hfs_clonelink(struct vnode , int, kauth_cred_t, struct proc );
	76	static int hfs_clonefile(struct vnode *, int, int, int);
	77	static int hfs_clonesysfile(struct vnode , int, int, int, kauth_cred_t, struct proc );
	78
	79
	80	/*****************************************************************************
	81	*
	82	* I/O Operations on vnodes
	83	*
	84	*****************************************************************************/
	85	int hfs_vnop_read(struct vnop_read_args *);
	86	int hfs_vnop_write(struct vnop_write_args *);
	87	int hfs_vnop_ioctl(struct vnop_ioctl_args *);
	88	int hfs_vnop_select(struct vnop_select_args *);
	89	int hfs_vnop_blktooff(struct vnop_blktooff_args *);
	90	int hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
	91	int hfs_vnop_blockmap(struct vnop_blockmap_args *);
	92	int hfs_vnop_strategy(struct vnop_strategy_args *);
	93	int hfs_vnop_allocate(struct vnop_allocate_args *);
	94	int hfs_vnop_pagein(struct vnop_pagein_args *);
	95	int hfs_vnop_pageout(struct vnop_pageout_args *);
	96	int hfs_vnop_bwrite(struct vnop_bwrite_args *);
	97
	98
	99	/*
	100	* Read data from a file.
	101	*/
	102	int
	103	hfs_vnop_read(struct vnop_read_args *ap)
	104	{
	105	uio_t uio = ap->a_uio;
	106	struct vnode *vp = ap->a_vp;
	107	struct cnode *cp;
	108	struct filefork *fp;
	109	struct hfsmount *hfsmp;
	110	off_t filesize;
	111	off_t filebytes;
	112	off_t start_resid = uio_resid(uio);
	113	off_t offset = uio_offset(uio);
	114	int retval = 0;
	115
	116
	117	/* Preflight checks */
	118	if (!vnode_isreg(vp)) {
	119	/* can only read regular files */
	120	if (vnode_isdir(vp))
	121	return (EISDIR);
	122	else
	123	return (EPERM);
	124	}
	125	if (start_resid == 0)
	126	return (0); /* Nothing left to do */
	127	if (offset < 0)
	128	return (EINVAL); /* cant read from a negative offset */
	129
	130	cp = VTOC(vp);
	131	fp = VTOF(vp);
	132	hfsmp = VTOHFS(vp);
	133
	134	/* Protect against a size change. */
	135	hfs_lock_truncate(cp, 0);
	136
	137	filesize = fp->ff_size;
	138	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	139	if (offset > filesize) {
	140	if ((hfsmp->hfs_flags & HFS_STANDARD) &&
	141	(offset > (off_t)MAXHFSFILESIZE)) {
	142	retval = EFBIG;
	143	}
	144	goto exit;
	145	}
	146
	147	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_START,
	148	(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
	149
	150	retval = cluster_read(vp, uio, filesize, 0);
	151
	152	cp->c_touch_acctime = TRUE;
	153
	154	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_END,
	155	(int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
	156
	157	/*
	158	* Keep track blocks read
	159	*/
	160	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
	161	int took_cnode_lock = 0;
	162	off_t bytesread;
	163
	164	bytesread = start_resid - uio_resid(uio);
	165
	166	/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
	167	if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
	168	hfs_lock(cp, HFS_FORCE_LOCK);
	169	took_cnode_lock = 1;
	170	}
	171	/*
	172	* If this file hasn't been seen since the start of
	173	* the current sampling period then start over.
	174	*/
	175	if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
	176	struct timeval tv;
	177
	178	fp->ff_bytesread = bytesread;
	179	microtime(&tv);
	180	cp->c_atime = tv.tv_sec;
	181	} else {
	182	fp->ff_bytesread += bytesread;
	183	}
	184	if (took_cnode_lock)
	185	hfs_unlock(cp);
	186	}
	187	exit:
	188	hfs_unlock_truncate(cp);
	189	return (retval);
	190	}
	191
	192	/*
	193	* Write data to a file.
	194	*/
	195	int
	196	hfs_vnop_write(struct vnop_write_args *ap)
	197	{
	198	uio_t uio = ap->a_uio;
	199	struct vnode *vp = ap->a_vp;
	200	struct cnode *cp;
	201	struct filefork *fp;
	202	struct hfsmount *hfsmp;
	203	kauth_cred_t cred = NULL;
	204	off_t origFileSize;
	205	off_t writelimit;
	206	off_t bytesToAdd;
	207	off_t actualBytesAdded;
	208	off_t filebytes;
	209	off_t offset;
	210	size_t resid;
	211	int eflags;
	212	int ioflag = ap->a_ioflag;
	213	int retval = 0;
	214	int lockflags;
	215	int cnode_locked = 0;
	216
	217	// LP64todo - fix this! uio_resid may be 64-bit value
	218	resid = uio_resid(uio);
	219	offset = uio_offset(uio);
	220
	221	if (offset < 0)
	222	return (EINVAL);
	223	if (resid == 0)
	224	return (E_NONE);
	225	if (!vnode_isreg(vp))
	226	return (EPERM); /* Can only write regular files */
	227
	228	/* Protect against a size change. */
	229	hfs_lock_truncate(VTOC(vp), TRUE);
	230
	231	if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
	232	hfs_unlock_truncate(VTOC(vp));
	233	return (retval);
	234	}
	235	cnode_locked = 1;
	236	cp = VTOC(vp);
	237	fp = VTOF(vp);
	238	hfsmp = VTOHFS(vp);
	239	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	240
	241	if (ioflag & IO_APPEND) {
	242	uio_setoffset(uio, fp->ff_size);
	243	offset = fp->ff_size;
	244	}
	245	if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
	246	retval = EPERM;
	247	goto exit;
	248	}
	249
	250	origFileSize = fp->ff_size;
	251	eflags = kEFDeferMask; /* defer file block allocations */
	252
	253	#ifdef HFS_SPARSE_DEV
	254	/*
	255	* When the underlying device is sparse and space
	256	* is low (< 8MB), stop doing delayed allocations
	257	* and begin doing synchronous I/O.
	258	*/
	259	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	260	(hfs_freeblks(hfsmp, 0) < 2048)) {
	261	eflags &= ~kEFDeferMask;
	262	ioflag \|= IO_SYNC;
	263	}
	264	#endif /* HFS_SPARSE_DEV */
	265
	266	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_START,
	267	(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
	268
	269	/* Now test if we need to extend the file */
	270	/* Doing so will adjust the filebytes for us */
	271
	272	writelimit = offset + resid;
	273	if (writelimit <= filebytes)
	274	goto sizeok;
	275
	276	cred = vfs_context_ucred(ap->a_context);
	277	#if QUOTA
	278	bytesToAdd = writelimit - filebytes;
	279	retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
	280	cred, 0);
	281	if (retval)
	282	goto exit;
	283	#endif /* QUOTA */
	284
	285	if (hfs_start_transaction(hfsmp) != 0) {
	286	retval = EINVAL;
	287	goto exit;
	288	}
	289
	290	while (writelimit > filebytes) {
	291	bytesToAdd = writelimit - filebytes;
	292	if (cred && suser(cred, NULL) != 0)
	293	eflags \|= kEFReserveMask;
	294
	295	/* Protect extents b-tree and allocation bitmap */
	296	lockflags = SFL_BITMAP;
	297	if (overflow_extents(fp))
	298	lockflags \|= SFL_EXTENTS;
	299	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	300
	301	/* Files that are changing size are not hot file candidates. */
	302	if (hfsmp->hfc_stage == HFC_RECORDING) {
	303	fp->ff_bytesread = 0;
	304	}
	305	retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
	306	0, eflags, &actualBytesAdded));
	307
	308	hfs_systemfile_unlock(hfsmp, lockflags);
	309
	310	if ((actualBytesAdded == 0) && (retval == E_NONE))
	311	retval = ENOSPC;
	312	if (retval != E_NONE)
	313	break;
	314	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	315	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_NONE,
	316	(int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
	317	}
	318	(void) hfs_update(vp, TRUE);
	319	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	320	(void) hfs_end_transaction(hfsmp);
	321
	322	sizeok:
	323	if (retval == E_NONE) {
	324	off_t filesize;
	325	off_t zero_off;
	326	off_t tail_off;
	327	off_t inval_start;
	328	off_t inval_end;
	329	off_t io_start;
	330	int lflag;
	331	struct rl_entry *invalid_range;
	332
	333	if (writelimit > fp->ff_size)
	334	filesize = writelimit;
	335	else
	336	filesize = fp->ff_size;
	337
	338	lflag = (ioflag & IO_SYNC);
	339
	340	if (offset <= fp->ff_size) {
	341	zero_off = offset & ~PAGE_MASK_64;
	342
	343	/* Check to see whether the area between the zero_offset and the start
	344	of the transfer to see whether is invalid and should be zero-filled
	345	as part of the transfer:
	346	*/
	347	if (offset > zero_off) {
	348	if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
	349	lflag \|= IO_HEADZEROFILL;
	350	}
	351	} else {
	352	off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
	353
	354	/* The bytes between fp->ff_size and uio->uio_offset must never be
	355	read without being zeroed. The current last block is filled with zeroes
	356	if it holds valid data but in all cases merely do a little bookkeeping
	357	to track the area from the end of the current last page to the start of
	358	the area actually written. For the same reason only the bytes up to the
	359	start of the page where this write will start is invalidated; any remainder
	360	before uio->uio_offset is explicitly zeroed as part of the cluster_write.
	361
	362	Note that inval_start, the start of the page after the current EOF,
	363	may be past the start of the write, in which case the zeroing
	364	will be handled by the cluser_write of the actual data.
	365	*/
	366	inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	367	inval_end = offset & ~PAGE_MASK_64;
	368	zero_off = fp->ff_size;
	369
	370	if ((fp->ff_size & PAGE_MASK_64) &&
	371	(rl_scan(&fp->ff_invalidranges,
	372	eof_page_base,
	373	fp->ff_size - 1,
	374	&invalid_range) != RL_NOOVERLAP)) {
	375	/* The page containing the EOF is not valid, so the
	376	entire page must be made inaccessible now. If the write
	377	starts on a page beyond the page containing the eof
	378	(inval_end > eof_page_base), add the
	379	whole page to the range to be invalidated. Otherwise
	380	(i.e. if the write starts on the same page), zero-fill
	381	the entire page explicitly now:
	382	*/
	383	if (inval_end > eof_page_base) {
	384	inval_start = eof_page_base;
	385	} else {
	386	zero_off = eof_page_base;
	387	};
	388	};
	389
	390	if (inval_start < inval_end) {
	391	struct timeval tv;
	392	/* There's some range of data that's going to be marked invalid */
	393
	394	if (zero_off < inval_start) {
	395	/* The pages between inval_start and inval_end are going to be invalidated,
	396	and the actual write will start on a page past inval_end. Now's the last
	397	chance to zero-fill the page containing the EOF:
	398	*/
	399	hfs_unlock(cp);
	400	cnode_locked = 0;
	401	retval = cluster_write(vp, (uio_t) 0,
	402	fp->ff_size, inval_start,
	403	zero_off, (off_t)0,
	404	lflag \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	405	hfs_lock(cp, HFS_FORCE_LOCK);
	406	cnode_locked = 1;
	407	if (retval) goto ioerr_exit;
	408	offset = uio_offset(uio);
	409	};
	410
	411	/* Mark the remaining area of the newly allocated space as invalid: */
	412	rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
	413	microuptime(&tv);
	414	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	415	zero_off = fp->ff_size = inval_end;
	416	};
	417
	418	if (offset > zero_off) lflag \|= IO_HEADZEROFILL;
	419	};
	420
	421	/* Check to see whether the area between the end of the write and the end of
	422	the page it falls in is invalid and should be zero-filled as part of the transfer:
	423	*/
	424	tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	425	if (tail_off > filesize) tail_off = filesize;
	426	if (tail_off > writelimit) {
	427	if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
	428	lflag \|= IO_TAILZEROFILL;
	429	};
	430	};
	431
	432	/*
	433	* if the write starts beyond the current EOF (possibly advanced in the
	434	* zeroing of the last block, above), then we'll zero fill from the current EOF
	435	* to where the write begins:
	436	*
	437	* NOTE: If (and ONLY if) the portion of the file about to be written is
	438	* before the current EOF it might be marked as invalid now and must be
	439	* made readable (removed from the invalid ranges) before cluster_write
	440	* tries to write it:
	441	*/
	442	io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
	443	if (io_start < fp->ff_size) {
	444	off_t io_end;
	445
	446	io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
	447	rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
	448	};
	449
	450	hfs_unlock(cp);
	451	cnode_locked = 0;
	452	retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
	453	tail_off, lflag \| IO_NOZERODIRTY);
	454	offset = uio_offset(uio);
	455	if (offset > fp->ff_size) {
	456	fp->ff_size = offset;
	457
	458	ubc_setsize(vp, fp->ff_size); /* XXX check errors */
	459	/* Files that are changing size are not hot file candidates. */
	460	if (hfsmp->hfc_stage == HFC_RECORDING)
	461	fp->ff_bytesread = 0;
	462	}
	463	if (resid > uio_resid(uio)) {
	464	cp->c_touch_chgtime = TRUE;
	465	cp->c_touch_modtime = TRUE;
	466	}
	467	}
	468	HFS_KNOTE(vp, NOTE_WRITE);
	469
	470	ioerr_exit:
	471	/*
	472	* If we successfully wrote any data, and we are not the superuser
	473	* we clear the setuid and setgid bits as a precaution against
	474	* tampering.
	475	*/
	476	if (cp->c_mode & (S_ISUID \| S_ISGID)) {
	477	cred = vfs_context_ucred(ap->a_context);
	478	if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
	479	if (!cnode_locked) {
	480	hfs_lock(cp, HFS_FORCE_LOCK);
	481	cnode_locked = 1;
	482	}
	483	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	484	}
	485	}
	486	if (retval) {
	487	if (ioflag & IO_UNIT) {
	488	if (!cnode_locked) {
	489	hfs_lock(cp, HFS_FORCE_LOCK);
	490	cnode_locked = 1;
	491	}
	492	(void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
	493	0, ap->a_context);
	494	// LP64todo - fix this! resid needs to by user_ssize_t
	495	uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
	496	uio_setresid(uio, resid);
	497	filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
	498	}
	499	} else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
	500	if (!cnode_locked) {
	501	hfs_lock(cp, HFS_FORCE_LOCK);
	502	cnode_locked = 1;
	503	}
	504	retval = hfs_update(vp, TRUE);
	505	}
	506	/* Updating vcbWrCnt doesn't need to be atomic. */
	507	hfsmp->vcbWrCnt++;
	508
	509	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_END,
	510	(int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
	511	exit:
	512	if (cnode_locked)
	513	hfs_unlock(cp);
	514	hfs_unlock_truncate(cp);
	515	return (retval);
	516	}
	517
	518	/* support for the "bulk-access" fcntl */
	519
	520	#define CACHE_ELEMS 64
	521	#define CACHE_LEVELS 16
	522	#define PARENT_IDS_FLAG 0x100
	523
	524	/* from hfs_attrlist.c */
	525	extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
	526	mode_t obj_mode, struct mount *mp,
	527	kauth_cred_t cred, struct proc *p);
	528
	529	/* from vfs/vfs_fsevents.c */
	530	extern char *get_pathbuff(void);
	531	extern void release_pathbuff(char *buff);
	532
	533	struct access_cache {
	534	int numcached;
	535	int cachehits; /* these two for statistics gathering */
	536	int lookups;
	537	unsigned int *acache;
	538	Boolean *haveaccess;
	539	};
	540
	541	struct access_t {
	542	uid_t uid; /* IN: effective user id */
	543	short flags; /* IN: access requested (i.e. R_OK) */
	544	short num_groups; /* IN: number of groups user belongs to */
	545	int num_files; /* IN: number of files to process */
	546	int file_ids; / IN: array of file ids */
	547	gid_t groups; / IN: array of groups */
	548	short access; / OUT: access info for each file (0 for 'has access') */
	549	};
	550
	551	struct user_access_t {
	552	uid_t uid; /* IN: effective user id */
	553	short flags; /* IN: access requested (i.e. R_OK) */
	554	short num_groups; /* IN: number of groups user belongs to */
	555	int num_files; /* IN: number of files to process */
	556	user_addr_t file_ids; /* IN: array of file ids */
	557	user_addr_t groups; /* IN: array of groups */
	558	user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
	559	};
	560
	561	/*
	562	* Perform a binary search for the given parent_id. Return value is
	563	* found/not found boolean, and indexp will be the index of the item
	564	* or the index at which to insert the item if it's not found.
	565	*/
	566	static int
	567	lookup_bucket(struct access_cache cache, int indexp, cnid_t parent_id)
	568	{
	569	unsigned int lo, hi;
	570	int index, matches = 0;
	571
	572	if (cache->numcached == 0) {
	573	*indexp = 0;
	574	return 0; // table is empty, so insert at index=0 and report no match
	575	}
	576
	577	if (cache->numcached > CACHE_ELEMS) {
	578	/*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
	579	cache->numcached, CACHE_ELEMS);*/
	580	cache->numcached = CACHE_ELEMS;
	581	}
	582
	583	lo = 0;
	584	hi = cache->numcached - 1;
	585	index = -1;
	586
	587	/* perform binary search for parent_id */
	588	do {
	589	unsigned int mid = (hi - lo)/2 + lo;
	590	unsigned int this_id = cache->acache[mid];
	591
	592	if (parent_id == this_id) {
	593	index = mid;
	594	break;
	595	}
	596
	597	if (parent_id < this_id) {
	598	hi = mid;
	599	continue;
	600	}
	601
	602	if (parent_id > this_id) {
	603	lo = mid + 1;
	604	continue;
	605	}
	606	} while(lo < hi);
	607
	608	/* check if lo and hi converged on the match */
	609	if (parent_id == cache->acache[hi]) {
	610	index = hi;
	611	}
	612
	613	/* if no existing entry found, find index for new one */
	614	if (index == -1) {
	615	index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
	616	matches = 0;
	617	} else {
	618	matches = 1;
	619	}
	620
	621	*indexp = index;
	622	return matches;
	623	}
	624
	625	/*
	626	* Add a node to the access_cache at the given index (or do a lookup first
	627	* to find the index if -1 is passed in). We currently do a replace rather
	628	* than an insert if the cache is full.
	629	*/
	630	static void
	631	add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
	632	{
	633	int lookup_index = -1;
	634
	635	/* need to do a lookup first if -1 passed for index */
	636	if (index == -1) {
	637	if (lookup_bucket(cache, &lookup_index, nodeID)) {
	638	if (cache->haveaccess[lookup_index] != access) {
	639	/* change access info for existing entry... should never happen */
	640	cache->haveaccess[lookup_index] = access;
	641	}
	642
	643	/* mission accomplished */
	644	return;
	645	} else {
	646	index = lookup_index;
	647	}
	648
	649	}
	650
	651	/* if the cache is full, do a replace rather than an insert */
	652	if (cache->numcached >= CACHE_ELEMS) {
	653	//printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
	654	cache->numcached = CACHE_ELEMS-1;
	655
	656	if (index > cache->numcached) {
	657	// printf("index %d pinned to %d\n", index, cache->numcached);
	658	index = cache->numcached;
	659	}
	660	} else if (index >= 0 && index < cache->numcached) {
	661	/* only do bcopy if we're inserting */
	662	bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
	663	bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
	664	}
	665
	666	cache->acache[index] = nodeID;
	667	cache->haveaccess[index] = access;
	668	cache->numcached++;
	669	}
	670
	671
	672	struct cinfo {
	673	uid_t uid;
	674	gid_t gid;
	675	mode_t mode;
	676	cnid_t parentcnid;
	677	};
	678
	679	static int
	680	snoop_callback(const struct cat_desc descp, const struct cat_attr attrp, void * arg)
	681	{
	682	struct cinfo cip = (struct cinfo )arg;
	683
	684	cip->uid = attrp->ca_uid;
	685	cip->gid = attrp->ca_gid;
	686	cip->mode = attrp->ca_mode;
	687	cip->parentcnid = descp->cd_parentcnid;
	688
	689	return (0);
	690	}
	691
	692	/*
	693	* Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
	694	* isn't incore, then go to the catalog.
	695	*/
	696	static int
	697	do_attr_lookup(struct hfsmount hfsmp, struct access_cache cache, dev_t dev, cnid_t cnid,
	698	struct cnode skip_cp, CatalogKey keyp, struct cat_attr cnattrp, struct proc p)
	699	{
	700	int error = 0;
	701
	702	/* if this id matches the one the fsctl was called with, skip the lookup */
	703	if (cnid == skip_cp->c_cnid) {
	704	cnattrp->ca_uid = skip_cp->c_uid;
	705	cnattrp->ca_gid = skip_cp->c_gid;
	706	cnattrp->ca_mode = skip_cp->c_mode;
	707	keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
	708	} else {
	709	struct cinfo c_info;
	710
	711	/* otherwise, check the cnode hash incase the file/dir is incore */
	712	if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
	713	cnattrp->ca_uid = c_info.uid;
	714	cnattrp->ca_gid = c_info.gid;
	715	cnattrp->ca_mode = c_info.mode;
	716	keyp->hfsPlus.parentID = c_info.parentcnid;
	717	} else {
	718	int lockflags;
	719
	720	lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
	721
	722	/* lookup this cnid in the catalog */
	723	error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
	724
	725	hfs_systemfile_unlock(hfsmp, lockflags);
	726
	727	cache->lookups++;
	728	}
	729	}
	730
	731	return (error);
	732	}
	733
	734	/*
	735	* Compute whether we have access to the given directory (nodeID) and all its parents. Cache
	736	* up to CACHE_LEVELS as we progress towards the root.
	737	*/
	738	static int
	739	do_access_check(struct hfsmount hfsmp, int err, struct access_cache *cache, HFSCatalogNodeID nodeID,
	740	struct cnode skip_cp, struct proc theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
	741	{
	742	int myErr = 0;
	743	int myResult;
	744	HFSCatalogNodeID thisNodeID;
	745	unsigned long myPerms;
	746	struct cat_attr cnattr;
	747	int cache_index = -1;
	748	CatalogKey catkey;
	749
	750	int i = 0, ids_to_cache = 0;
	751	int parent_ids[CACHE_LEVELS];
	752
	753	/* root always has access */
	754	if (!suser(myp_ucred, NULL)) {
	755	return (1);
	756	}
	757
	758	thisNodeID = nodeID;
	759	while (thisNodeID >= kRootDirID) {
	760	myResult = 0; /* default to "no access" */
	761
	762	/* check the cache before resorting to hitting the catalog */
	763
	764	/* ASSUMPTION: access info of cached entries is "final"... i.e. no need
	765	* to look any further after hitting cached dir */
	766
	767	if (lookup_bucket(cache, &cache_index, thisNodeID)) {
	768	cache->cachehits++;
	769	myResult = cache->haveaccess[cache_index];
	770	goto ExitThisRoutine;
	771	}
	772
	773	/* remember which parents we want to cache */
	774	if (ids_to_cache < CACHE_LEVELS) {
	775	parent_ids[ids_to_cache] = thisNodeID;
	776	ids_to_cache++;
	777	}
	778
	779	/* do the lookup (checks the cnode hash, then the catalog) */
	780	myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
	781	if (myErr) {
	782	goto ExitThisRoutine; /* no access */
	783	}
	784
	785	myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
	786	cnattr.ca_mode, hfsmp->hfs_mp,
	787	myp_ucred, theProcPtr);
	788
	789	if ( (myPerms & X_OK) == 0 ) {
	790	myResult = 0;
	791	goto ExitThisRoutine; /* no access */
	792	}
	793
	794	/* up the hierarchy we go */
	795	thisNodeID = catkey.hfsPlus.parentID;
	796	}
	797
	798	/* if here, we have access to this node */
	799	myResult = 1;
	800
	801	ExitThisRoutine:
	802	if (myErr) {
	803	//printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
	804	myResult = 0;
	805	}
	806	*err = myErr;
	807
	808	/* cache the parent directory(ies) */
	809	for (i = 0; i < ids_to_cache; i++) {
	810	/* small optimization: get rid of double-lookup for all these */
	811	// printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
	812	add_node(cache, -1, parent_ids[i], myResult);
	813	}
	814
	815	return (myResult);
	816	}
	817	/* end "bulk-access" support */
	818
	819
	820
	821	/*
	822	* Callback for use with freeze ioctl.
	823	*/
	824	static int
	825	hfs_freezewrite_callback(struct vnode vp, void cargs)
	826	{
	827	vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
	828
	829	return 0;
	830	}
	831
	832	/*
	833	* Control filesystem operating characteristics.
	834	*/
	835	int
	836	hfs_vnop_ioctl( struct vnop_ioctl_args /* {
	837	vnode_t a_vp;
	838	int a_command;
	839	caddr_t a_data;
	840	int a_fflag;
	841	vfs_context_t a_context;
	842	} / ap)
	843	{
	844	struct vnode * vp = ap->a_vp;
	845	struct hfsmount *hfsmp = VTOHFS(vp);
	846	vfs_context_t context = ap->a_context;
	847	kauth_cred_t cred = vfs_context_ucred(context);
	848	proc_t p = vfs_context_proc(context);
	849	struct vfsstatfs *vfsp;
	850	boolean_t is64bit;
	851
	852	is64bit = proc_is64bit(p);
	853
	854	switch (ap->a_command) {
	855
	856	case HFS_RESIZE_VOLUME: {
	857	u_int64_t newsize;
	858	u_int64_t cursize;
	859
	860	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	861	if (suser(cred, NULL) &&
	862	kauth_cred_getuid(cred) != vfsp->f_owner) {
	863	return (EACCES); /* must be owner of file system */
	864	}
	865	if (!vnode_isvroot(vp)) {
	866	return (EINVAL);
	867	}
	868	newsize = (u_int64_t )ap->a_data;
	869	cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
	870
	871	if (newsize > cursize) {
	872	return hfs_extendfs(hfsmp, (u_int64_t )ap->a_data, context);
	873	} else if (newsize < cursize) {
	874	return hfs_truncatefs(hfsmp, (u_int64_t )ap->a_data, context);
	875	} else {
	876	return (0);
	877	}
	878	}
	879	case HFS_CHANGE_NEXT_ALLOCATION: {
	880	u_int32_t location;
	881
	882	if (vnode_vfsisrdonly(vp)) {
	883	return (EROFS);
	884	}
	885	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	886	if (suser(cred, NULL) &&
	887	kauth_cred_getuid(cred) != vfsp->f_owner) {
	888	return (EACCES); /* must be owner of file system */
	889	}
	890	if (!vnode_isvroot(vp)) {
	891	return (EINVAL);
	892	}
	893	location = (u_int32_t )ap->a_data;
	894	if (location > hfsmp->totalBlocks - 1) {
	895	return (EINVAL);
	896	}
	897	/* Return previous value. */
	898	(u_int32_t )ap->a_data = hfsmp->nextAllocation;
	899	HFS_MOUNT_LOCK(hfsmp, TRUE);
	900	hfsmp->nextAllocation = location;
	901	hfsmp->vcbFlags \|= 0xFF00;
	902	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	903	return (0);
	904	}
	905
	906	#ifdef HFS_SPARSE_DEV
	907	case HFS_SETBACKINGSTOREINFO: {
	908	struct vnode * bsfs_rootvp;
	909	struct vnode * di_vp;
	910	struct hfs_backingstoreinfo *bsdata;
	911	int error = 0;
	912
	913	if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
	914	return (EALREADY);
	915	}
	916	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	917	if (suser(cred, NULL) &&
	918	kauth_cred_getuid(cred) != vfsp->f_owner) {
	919	return (EACCES); /* must be owner of file system */
	920	}
	921	bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
	922	if (bsdata == NULL) {
	923	return (EINVAL);
	924	}
	925	if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
	926	return (error);
	927	}
	928	if ((error = vnode_getwithref(di_vp))) {
	929	file_drop(bsdata->backingfd);
	930	return(error);
	931	}
	932
	933	if (vnode_mount(vp) == vnode_mount(di_vp)) {
	934	(void)vnode_put(di_vp);
	935	file_drop(bsdata->backingfd);
	936	return (EINVAL);
	937	}
	938
	939	/*
	940	* Obtain the backing fs root vnode and keep a reference
	941	* on it. This reference will be dropped in hfs_unmount.
	942	*/
	943	error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
	944	if (error) {
	945	(void)vnode_put(di_vp);
	946	file_drop(bsdata->backingfd);
	947	return (error);
	948	}
	949	vnode_ref(bsfs_rootvp);
	950	vnode_put(bsfs_rootvp);
	951
	952	hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
	953	hfsmp->hfs_flags \|= HFS_HAS_SPARSE_DEVICE;
	954	hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
	955	hfsmp->hfs_sparsebandblks *= 4;
	956
	957	(void)vnode_put(di_vp);
	958	file_drop(bsdata->backingfd);
	959	return (0);
	960	}
	961	case HFS_CLRBACKINGSTOREINFO: {
	962	struct vnode * tmpvp;
	963
	964	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	965	if (suser(cred, NULL) &&
	966	kauth_cred_getuid(cred) != vfsp->f_owner) {
	967	return (EACCES); /* must be owner of file system */
	968	}
	969	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	970	hfsmp->hfs_backingfs_rootvp) {
	971
	972	hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
	973	tmpvp = hfsmp->hfs_backingfs_rootvp;
	974	hfsmp->hfs_backingfs_rootvp = NULLVP;
	975	hfsmp->hfs_sparsebandblks = 0;
	976	vnode_rele(tmpvp);
	977	}
	978	return (0);
	979	}
	980	#endif /* HFS_SPARSE_DEV */
	981
	982	case F_FREEZE_FS: {
	983	struct mount *mp;
	984	task_t task;
	985
	986	if (!is_suser())
	987	return (EACCES);
	988
	989	mp = vnode_mount(vp);
	990	hfsmp = VFSTOHFS(mp);
	991
	992	if (!(hfsmp->jnl))
	993	return (ENOTSUP);
	994
	995	lck_rw_lock_exclusive(&hfsmp->hfs_insync);
	996
	997	task = current_task();
	998	task_working_set_disable(task);
	999
	1000	// flush things before we get started to try and prevent
	1001	// dirty data from being paged out while we're frozen.
	1002	// note: can't do this after taking the lock as it will
	1003	// deadlock against ourselves.
	1004	vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
	1005	hfs_global_exclusive_lock_acquire(hfsmp);
	1006	journal_flush(hfsmp->jnl);
	1007
	1008	// don't need to iterate on all vnodes, we just need to
	1009	// wait for writes to the system files and the device vnode
	1010	if (HFSTOVCB(hfsmp)->extentsRefNum)
	1011	vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
	1012	if (HFSTOVCB(hfsmp)->catalogRefNum)
	1013	vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
	1014	if (HFSTOVCB(hfsmp)->allocationsRefNum)
	1015	vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
	1016	if (hfsmp->hfs_attribute_vp)
	1017	vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
	1018	vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
	1019
	1020	hfsmp->hfs_freezing_proc = current_proc();
	1021
	1022	return (0);
	1023	}
	1024
	1025	case F_THAW_FS: {
	1026	if (!is_suser())
	1027	return (EACCES);
	1028
	1029	// if we're not the one who froze the fs then we
	1030	// can't thaw it.
	1031	if (hfsmp->hfs_freezing_proc != current_proc()) {
	1032	return EPERM;
	1033	}
	1034
	1035	// NOTE: if you add code here, also go check the
	1036	// code that "thaws" the fs in hfs_vnop_close()
	1037	//
	1038	hfsmp->hfs_freezing_proc = NULL;
	1039	hfs_global_exclusive_lock_release(hfsmp);
	1040	lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
	1041
	1042	return (0);
	1043	}
	1044
	1045	#define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
	1046	#define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
	1047
	1048	case HFS_BULKACCESS_FSCTL:
	1049	case HFS_BULKACCESS: {
	1050	/*
	1051	* NOTE: on entry, the vnode is locked. Incase this vnode
	1052	* happens to be in our list of file_ids, we'll note it
	1053	* avoid calling hfs_chashget_nowait() on that id as that
	1054	* will cause a "locking against myself" panic.
	1055	*/
	1056	Boolean check_leaf = true;
	1057
	1058	struct user_access_t *user_access_structp;
	1059	struct user_access_t tmp_user_access_t;
	1060	struct access_cache cache;
	1061
	1062	int error = 0, i;
	1063
	1064	dev_t dev = VTOC(vp)->c_dev;
	1065
	1066	short flags;
	1067	struct ucred myucred; /* XXX ILLEGAL */
	1068	int num_files;
	1069	int *file_ids = NULL;
	1070	short *access = NULL;
	1071
	1072	cnid_t cnid;
	1073	cnid_t prevParent_cnid = 0;
	1074	unsigned long myPerms;
	1075	short myaccess = 0;
	1076	struct cat_attr cnattr;
	1077	CatalogKey catkey;
	1078	struct cnode *skip_cp = VTOC(vp);
	1079	struct vfs_context my_context;
	1080
	1081	/* first, return error if not run as root */
	1082	if (cred->cr_ruid != 0) {
	1083	return EPERM;
	1084	}
	1085
	1086	/* initialize the local cache and buffers */
	1087	cache.numcached = 0;
	1088	cache.cachehits = 0;
	1089	cache.lookups = 0;
	1090
	1091	file_ids = (int *) get_pathbuff();
	1092	access = (short *) get_pathbuff();
	1093	cache.acache = (int *) get_pathbuff();
	1094	cache.haveaccess = (Boolean *) get_pathbuff();
	1095
	1096	if (file_ids == NULL \|\| access == NULL \|\| cache.acache == NULL \|\| cache.haveaccess == NULL) {
	1097	release_pathbuff((char *) file_ids);
	1098	release_pathbuff((char *) access);
	1099	release_pathbuff((char *) cache.acache);
	1100	release_pathbuff((char *) cache.haveaccess);
	1101
	1102	return ENOMEM;
	1103	}
	1104
	1105	/* struct copyin done during dispatch... need to copy file_id array separately */
	1106	if (ap->a_data == NULL) {
	1107	error = EINVAL;
	1108	goto err_exit_bulk_access;
	1109	}
	1110
	1111	if (is64bit) {
	1112	user_access_structp = (struct user_access_t *)ap->a_data;
	1113	}
	1114	else {
	1115	struct access_t * accessp = (struct access_t *)ap->a_data;
	1116	tmp_user_access_t.uid = accessp->uid;
	1117	tmp_user_access_t.flags = accessp->flags;
	1118	tmp_user_access_t.num_groups = accessp->num_groups;
	1119	tmp_user_access_t.num_files = accessp->num_files;
	1120	tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
	1121	tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
	1122	tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
	1123	user_access_structp = &tmp_user_access_t;
	1124	}
	1125
	1126	num_files = user_access_structp->num_files;
	1127	if (num_files < 1) {
	1128	goto err_exit_bulk_access;
	1129	}
	1130	if (num_files > 256) {
	1131	error = EINVAL;
	1132	goto err_exit_bulk_access;
	1133	}
	1134
	1135	if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
	1136	num_files * sizeof(int)))) {
	1137	goto err_exit_bulk_access;
	1138	}
	1139
	1140	/* fill in the ucred structure */
	1141	flags = user_access_structp->flags;
	1142	if ((flags & (F_OK \| R_OK \| W_OK \| X_OK)) == 0) {
	1143	flags = R_OK;
	1144	}
	1145
	1146	/* check if we've been passed leaf node ids or parent ids */
	1147	if (flags & PARENT_IDS_FLAG) {
	1148	check_leaf = false;
	1149	}
	1150
	1151	memset(&myucred, 0, sizeof(myucred));
	1152	myucred.cr_ref = 1;
	1153	myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
	1154	myucred.cr_ngroups = user_access_structp->num_groups;
	1155	if (myucred.cr_ngroups < 1 \|\| myucred.cr_ngroups > 16) {
	1156	myucred.cr_ngroups = 0;
	1157	} else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
	1158	myucred.cr_ngroups * sizeof(gid_t)))) {
	1159	goto err_exit_bulk_access;
	1160	}
	1161	myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
	1162
	1163	my_context.vc_proc = p;
	1164	my_context.vc_ucred = &myucred;
	1165
	1166	/* Check access to each file_id passed in */
	1167	for (i = 0; i < num_files; i++) {
	1168	#if 0
	1169	cnid = (cnid_t) file_ids[i];
	1170
	1171	/* root always has access */
	1172	if (!suser(&myucred, NULL)) {
	1173	access[i] = 0;
	1174	continue;
	1175	}
	1176
	1177	if (check_leaf) {
	1178
	1179	/* do the lookup (checks the cnode hash, then the catalog) */
	1180	error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
	1181	if (error) {
	1182	access[i] = (short) error;
	1183	continue;
	1184	}
	1185
	1186	/* before calling CheckAccess(), check the target file for read access */
	1187	myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
	1188	cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p );
	1189
	1190
	1191	/* fail fast if no access */
	1192	if ((myPerms & flags) == 0) {
	1193	access[i] = EACCES;
	1194	continue;
	1195	}
	1196	} else {
	1197	/* we were passed an array of parent ids */
	1198	catkey.hfsPlus.parentID = cnid;
	1199	}
	1200
	1201	/* if the last guy had the same parent and had access, we're done */
	1202	if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
	1203	cache.cachehits++;
	1204	access[i] = 0;
	1205	continue;
	1206	}
	1207
	1208	myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
	1209	skip_cp, p, &myucred, dev);
	1210
	1211	if ( myaccess ) {
	1212	access[i] = 0; // have access.. no errors to report
	1213	} else {
	1214	access[i] = (error != 0 ? (short) error : EACCES);
	1215	}
	1216
	1217	prevParent_cnid = catkey.hfsPlus.parentID;
	1218	#else
	1219	int myErr;
	1220
	1221	cnid = (cnid_t)file_ids[i];
	1222
	1223	while (cnid >= kRootDirID) {
	1224	/* get the vnode for this cnid */
	1225	myErr = hfs_vget(hfsmp, cnid, &vp, 0);
	1226	if ( myErr ) {
	1227	access[i] = EACCES;
	1228	break;
	1229	}
	1230
	1231	cnid = VTOC(vp)->c_parentcnid;
	1232
	1233	hfs_unlock(VTOC(vp));
	1234	if (vnode_vtype(vp) == VDIR) {
	1235	myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH \| KAUTH_VNODE_LIST_DIRECTORY), &my_context);
	1236	} else {
	1237	myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
	1238	}
	1239	vnode_put(vp);
	1240	access[i] = myErr;
	1241	if (myErr) {
	1242	break;
	1243	}
	1244	}
	1245	#endif
	1246	}
	1247
	1248	/* copyout the access array */
	1249	if ((error = copyout((caddr_t)access, user_access_structp->access,
	1250	num_files * sizeof (short)))) {
	1251	goto err_exit_bulk_access;
	1252	}
	1253
	1254	err_exit_bulk_access:
	1255
	1256	//printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
	1257
	1258	release_pathbuff((char *) cache.acache);
	1259	release_pathbuff((char *) cache.haveaccess);
	1260	release_pathbuff((char *) file_ids);
	1261	release_pathbuff((char *) access);
	1262
	1263	return (error);
	1264	} /* HFS_BULKACCESS */
	1265
	1266	case HFS_SETACLSTATE: {
	1267	int state;
	1268
	1269	if (ap->a_data == NULL) {
	1270	return (EINVAL);
	1271	}
	1272
	1273	vfsp = vfs_statfs(HFSTOVFS(hfsmp));
	1274	state = (int )ap->a_data;
	1275
	1276	// super-user can enable or disable acl's on a volume.
	1277	// the volume owner can only enable acl's
	1278	if (!is_suser() && (state == 0 \|\| kauth_cred_getuid(cred) != vfsp->f_owner)) {
	1279	return (EPERM);
	1280	}
	1281	if (state == 0 \|\| state == 1)
	1282	return hfs_setextendedsecurity(hfsmp, state);
	1283	else
	1284	return (EINVAL);
	1285	}
	1286
	1287	case F_FULLFSYNC: {
	1288	int error;
	1289
	1290	error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
	1291	if (error == 0) {
	1292	error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
	1293	hfs_unlock(VTOC(vp));
	1294	}
	1295
	1296	return error;
	1297	}
	1298
	1299	case F_CHKCLEAN: {
	1300	register struct cnode *cp;
	1301	int error;
	1302
	1303	if (!vnode_isreg(vp))
	1304	return EINVAL;
	1305
	1306	error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
	1307	if (error == 0) {
	1308	cp = VTOC(vp);
	1309	/*
	1310	* used by regression test to determine if
	1311	* all the dirty pages (via write) have been cleaned
	1312	* after a call to 'fsysnc'.
	1313	*/
	1314	error = is_file_clean(vp, VTOF(vp)->ff_size);
	1315	hfs_unlock(cp);
	1316	}
	1317	return (error);
	1318	}
	1319
	1320	case F_RDADVISE: {
	1321	register struct radvisory *ra;
	1322	struct filefork *fp;
	1323	int error;
	1324
	1325	if (!vnode_isreg(vp))
	1326	return EINVAL;
	1327
	1328	ra = (struct radvisory *)(ap->a_data);
	1329	fp = VTOF(vp);
	1330
	1331	/* Protect against a size change. */
	1332	hfs_lock_truncate(VTOC(vp), TRUE);
	1333
	1334	if (ra->ra_offset >= fp->ff_size) {
	1335	error = EFBIG;
	1336	} else {
	1337	error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
	1338	}
	1339
	1340	hfs_unlock_truncate(VTOC(vp));
	1341	return (error);
	1342	}
	1343
	1344	case F_READBOOTSTRAP:
	1345	case F_WRITEBOOTSTRAP:
	1346	{
	1347	struct vnode *devvp = NULL;
	1348	user_fbootstraptransfer_t *user_bootstrapp;
	1349	int devBlockSize;
	1350	int error;
	1351	uio_t auio;
	1352	daddr64_t blockNumber;
	1353	u_long blockOffset;
	1354	u_long xfersize;
	1355	struct buf *bp;
	1356	user_fbootstraptransfer_t user_bootstrap;
	1357
	1358	if (!vnode_isvroot(vp))
	1359	return (EINVAL);
	1360	/* LP64 - when caller is a 64 bit process then we are passed a pointer
	1361	* to a user_fbootstraptransfer_t else we get a pointer to a
	1362	* fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
	1363	*/
	1364	if (is64bit) {
	1365	user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
	1366	}
	1367	else {
	1368	fbootstraptransfer_t bootstrapp = (fbootstraptransfer_t )ap->a_data;
	1369	user_bootstrapp = &user_bootstrap;
	1370	user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
	1371	user_bootstrap.fbt_length = bootstrapp->fbt_length;
	1372	user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
	1373	}
	1374	if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
	1375	return EINVAL;
	1376
	1377	devvp = VTOHFS(vp)->hfs_devvp;
	1378	auio = uio_create(1, user_bootstrapp->fbt_offset,
	1379	is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
	1380	(ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
	1381	uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
	1382
	1383	devBlockSize = vfs_devblocksize(vnode_mount(vp));
	1384
	1385	while (uio_resid(auio) > 0) {
	1386	blockNumber = uio_offset(auio) / devBlockSize;
	1387	error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
	1388	if (error) {
	1389	if (bp) buf_brelse(bp);
	1390	uio_free(auio);
	1391	return error;
	1392	};
	1393
	1394	blockOffset = uio_offset(auio) % devBlockSize;
	1395	xfersize = devBlockSize - blockOffset;
	1396	error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
	1397	if (error) {
	1398	buf_brelse(bp);
	1399	uio_free(auio);
	1400	return error;
	1401	};
	1402	if (uio_rw(auio) == UIO_WRITE) {
	1403	error = VNOP_BWRITE(bp);
	1404	if (error) {
	1405	uio_free(auio);
	1406	return error;
	1407	}
	1408	} else {
	1409	buf_brelse(bp);
	1410	};
	1411	};
	1412	uio_free(auio);
	1413	};
	1414	return 0;
	1415
	1416	case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
	1417	{
	1418	if (is64bit) {
	1419	(user_time_t )(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
	1420	}
	1421	else {
	1422	(time_t )(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
	1423	}
	1424	return 0;
	1425	}
	1426
	1427	case HFS_GET_MOUNT_TIME:
	1428	return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
	1429	break;
	1430
	1431	case HFS_GET_LAST_MTIME:
	1432	return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
	1433	break;
	1434
	1435	case HFS_SET_BOOT_INFO:
	1436	if (!vnode_isvroot(vp))
	1437	return(EINVAL);
	1438	if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
	1439	return(EACCES); /* must be superuser or owner of filesystem */
	1440	HFS_MOUNT_LOCK(hfsmp, TRUE);
	1441	bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
	1442	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	1443	(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
	1444	break;
	1445
	1446	case HFS_GET_BOOT_INFO:
	1447	if (!vnode_isvroot(vp))
	1448	return(EINVAL);
	1449	HFS_MOUNT_LOCK(hfsmp, TRUE);
	1450	bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
	1451	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	1452	break;
	1453
	1454	default:
	1455	return (ENOTTY);
	1456	}
	1457
	1458	/* Should never get here */
	1459	return 0;
	1460	}
	1461
	1462	/*
	1463	* select
	1464	*/
	1465	int
	1466	hfs_vnop_select(__unused struct vnop_select_args *ap)
	1467	/*
	1468	struct vnop_select_args {
	1469	vnode_t a_vp;
	1470	int a_which;
	1471	int a_fflags;
	1472	void *a_wql;
	1473	vfs_context_t a_context;
	1474	};
	1475	*/
	1476	{
	1477	/*
	1478	* We should really check to see if I/O is possible.
	1479	*/
	1480	return (1);
	1481	}
	1482
	1483	/*
	1484	* Converts a logical block number to a physical block, and optionally returns
	1485	* the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
	1486	* The physical block number is based on the device block size, currently its 512.
	1487	* The block run is returned in logical blocks, and is the REMAINING amount of blocks
	1488	*/
	1489	int
	1490	hfs_bmap(struct vnode vp, daddr_t bn, struct vnode vpp, daddr64_t bnp, int *runp)
	1491	{
	1492	struct cnode *cp = VTOC(vp);
	1493	struct filefork *fp = VTOF(vp);
	1494	struct hfsmount *hfsmp = VTOHFS(vp);
	1495	int retval = E_NONE;
	1496	daddr_t logBlockSize;
	1497	size_t bytesContAvail = 0;
	1498	off_t blockposition;
	1499	int lockExtBtree;
	1500	int lockflags = 0;
	1501
	1502	/*
	1503	* Check for underlying vnode requests and ensure that logical
	1504	* to physical mapping is requested.
	1505	*/
	1506	if (vpp != NULL)
	1507	*vpp = cp->c_devvp;
	1508	if (bnp == NULL)
	1509	return (0);
	1510
	1511	logBlockSize = GetLogicalBlockSize(vp);
	1512	blockposition = (off_t)bn * (off_t)logBlockSize;
	1513
	1514	lockExtBtree = overflow_extents(fp);
	1515
	1516	if (lockExtBtree)
	1517	lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
	1518
	1519	retval = MacToVFSError(
	1520	MapFileBlockC (HFSTOVCB(hfsmp),
	1521	(FCB*)fp,
	1522	MAXPHYSIO,
	1523	blockposition,
	1524	bnp,
	1525	&bytesContAvail));
	1526
	1527	if (lockExtBtree)
	1528	hfs_systemfile_unlock(hfsmp, lockflags);
	1529
	1530	if (retval == E_NONE) {
	1531	/* Figure out how many read ahead blocks there are */
	1532	if (runp != NULL) {
	1533	if (can_cluster(logBlockSize)) {
	1534	/* Make sure this result never goes negative: */
	1535	*runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
	1536	} else {
	1537	*runp = 0;
	1538	}
	1539	}
	1540	}
	1541	return (retval);
	1542	}
	1543
	1544	/*
	1545	* Convert logical block number to file offset.
	1546	*/
	1547	int
	1548	hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
	1549	/*
	1550	struct vnop_blktooff_args {
	1551	vnode_t a_vp;
	1552	daddr64_t a_lblkno;
	1553	off_t *a_offset;
	1554	};
	1555	*/
	1556	{
	1557	if (ap->a_vp == NULL)
	1558	return (EINVAL);
	1559	ap->a_offset = (off_t)ap->a_lblkno (off_t)GetLogicalBlockSize(ap->a_vp);
	1560
	1561	return(0);
	1562	}
	1563
	1564	/*
	1565	* Convert file offset to logical block number.
	1566	*/
	1567	int
	1568	hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
	1569	/*
	1570	struct vnop_offtoblk_args {
	1571	vnode_t a_vp;
	1572	off_t a_offset;
	1573	daddr64_t *a_lblkno;
	1574	};
	1575	*/
	1576	{
	1577	if (ap->a_vp == NULL)
	1578	return (EINVAL);
	1579	*ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
	1580
	1581	return(0);
	1582	}
	1583
	1584	/*
	1585	* Map file offset to physical block number.
	1586	*
	1587	* System file cnodes are expected to be locked (shared or exclusive).
	1588	*/
	1589	int
	1590	hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
	1591	/*
	1592	struct vnop_blockmap_args {
	1593	vnode_t a_vp;
	1594	off_t a_foffset;
	1595	size_t a_size;
	1596	daddr64_t *a_bpn;
	1597	size_t *a_run;
	1598	void *a_poff;
	1599	int a_flags;
	1600	vfs_context_t a_context;
	1601	};
	1602	*/
	1603	{
	1604	struct vnode *vp = ap->a_vp;
	1605	struct cnode *cp;
	1606	struct filefork *fp;
	1607	struct hfsmount *hfsmp;
	1608	size_t bytesContAvail = 0;
	1609	int retval = E_NONE;
	1610	int syslocks = 0;
	1611	int lockflags = 0;
	1612	struct rl_entry *invalid_range;
	1613	enum rl_overlaptype overlaptype;
	1614	int started_tr = 0;
	1615	int tooklock = 0;
	1616
	1617	/* Do not allow blockmap operation on a directory */
	1618	if (vnode_isdir(vp)) {
	1619	return (ENOTSUP);
	1620	}
	1621
	1622	/*
	1623	* Check for underlying vnode requests and ensure that logical
	1624	* to physical mapping is requested.
	1625	*/
	1626	if (ap->a_bpn == NULL)
	1627	return (0);
	1628
	1629	if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
	1630	if (VTOC(vp)->c_lockowner != current_thread()) {
	1631	hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
	1632	tooklock = 1;
	1633	} else {
	1634	cp = VTOC(vp);
	1635	panic("blockmap: %s cnode lock already held!\n",
	1636	cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
	1637	}
	1638	}
	1639	hfsmp = VTOHFS(vp);
	1640	cp = VTOC(vp);
	1641	fp = VTOF(vp);
	1642
	1643	retry:
	1644	if (fp->ff_unallocblocks) {
	1645	if (hfs_start_transaction(hfsmp) != 0) {
	1646	retval = EINVAL;
	1647	goto exit;
	1648	} else {
	1649	started_tr = 1;
	1650	}
	1651	syslocks = SFL_EXTENTS \| SFL_BITMAP;
	1652
	1653	} else if (overflow_extents(fp)) {
	1654	syslocks = SFL_EXTENTS;
	1655	}
	1656
	1657	if (syslocks)
	1658	lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
	1659
	1660	/*
	1661	* Check for any delayed allocations.
	1662	*/
	1663	if (fp->ff_unallocblocks) {
	1664	SInt64 actbytes;
	1665	u_int32_t loanedBlocks;
	1666
	1667	//
	1668	// Make sure we have a transaction. It's possible
	1669	// that we came in and fp->ff_unallocblocks was zero
	1670	// but during the time we blocked acquiring the extents
	1671	// btree, ff_unallocblocks became non-zero and so we
	1672	// will need to start a transaction.
	1673	//
	1674	if (started_tr == 0) {
	1675	if (syslocks) {
	1676	hfs_systemfile_unlock(hfsmp, lockflags);
	1677	syslocks = 0;
	1678	}
	1679	goto retry;
	1680	}
	1681
	1682	/*
	1683	* Note: ExtendFileC will Release any blocks on loan and
	1684	* aquire real blocks. So we ask to extend by zero bytes
	1685	* since ExtendFileC will account for the virtual blocks.
	1686	*/
	1687
	1688	loanedBlocks = fp->ff_unallocblocks;
	1689	retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
	1690	kEFAllMask \| kEFNoClumpMask, &actbytes);
	1691
	1692	if (retval) {
	1693	fp->ff_unallocblocks = loanedBlocks;
	1694	cp->c_blocks += loanedBlocks;
	1695	fp->ff_blocks += loanedBlocks;
	1696
	1697	HFS_MOUNT_LOCK(hfsmp, TRUE);
	1698	hfsmp->loanedBlocks += loanedBlocks;
	1699	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	1700	}
	1701
	1702	if (retval) {
	1703	hfs_systemfile_unlock(hfsmp, lockflags);
	1704	cp->c_flag \|= C_MODIFIED;
	1705	if (started_tr) {
	1706	(void) hfs_update(vp, TRUE);
	1707	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	1708
	1709	hfs_end_transaction(hfsmp);
	1710	}
	1711	goto exit;
	1712	}
	1713	}
	1714
	1715	retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
	1716	ap->a_bpn, &bytesContAvail);
	1717	if (syslocks) {
	1718	hfs_systemfile_unlock(hfsmp, lockflags);
	1719	syslocks = 0;
	1720	}
	1721
	1722	if (started_tr) {
	1723	(void) hfs_update(vp, TRUE);
	1724	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	1725	hfs_end_transaction(hfsmp);
	1726	started_tr = 0;
	1727	}
	1728	if (retval) {
	1729	goto exit;
	1730	}
	1731
	1732	/* Adjust the mapping information for invalid file ranges: */
	1733	overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
	1734	ap->a_foffset + (off_t)bytesContAvail - 1,
	1735	&invalid_range);
	1736	if (overlaptype != RL_NOOVERLAP) {
	1737	switch(overlaptype) {
	1738	case RL_MATCHINGOVERLAP:
	1739	case RL_OVERLAPCONTAINSRANGE:
	1740	case RL_OVERLAPSTARTSBEFORE:
	1741	/* There's no valid block for this byte offset: */
	1742	*ap->a_bpn = (daddr64_t)-1;
	1743	/* There's no point limiting the amount to be returned
	1744	* if the invalid range that was hit extends all the way
	1745	* to the EOF (i.e. there's no valid bytes between the
	1746	* end of this range and the file's EOF):
	1747	*/
	1748	if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
	1749	(invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
	1750	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	1751	}
	1752	break;
	1753
	1754	case RL_OVERLAPISCONTAINED:
	1755	case RL_OVERLAPENDSAFTER:
	1756	/* The range of interest hits an invalid block before the end: */
	1757	if (invalid_range->rl_start == ap->a_foffset) {
	1758	/* There's actually no valid information to be had starting here: */
	1759	*ap->a_bpn = (daddr64_t)-1;
	1760	if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
	1761	(invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
	1762	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	1763	}
	1764	} else {
	1765	bytesContAvail = invalid_range->rl_start - ap->a_foffset;
	1766	}
	1767	break;
	1768
	1769	case RL_NOOVERLAP:
	1770	break;
	1771	} /* end switch */
	1772	if (bytesContAvail > ap->a_size)
	1773	bytesContAvail = ap->a_size;
	1774	}
	1775	if (ap->a_run)
	1776	*ap->a_run = bytesContAvail;
	1777
	1778	if (ap->a_poff)
	1779	(int )ap->a_poff = 0;
	1780	exit:
	1781	if (tooklock)
	1782	hfs_unlock(cp);
	1783
	1784	return (MacToVFSError(retval));
	1785	}
	1786
	1787
	1788	/*
	1789	* prepare and issue the I/O
	1790	* buf_strategy knows how to deal
	1791	* with requests that require
	1792	* fragmented I/Os
	1793	*/
	1794	int
	1795	hfs_vnop_strategy(struct vnop_strategy_args *ap)
	1796	{
	1797	buf_t bp = ap->a_bp;
	1798	vnode_t vp = buf_vnode(bp);
	1799	struct cnode *cp = VTOC(vp);
	1800
	1801	return (buf_strategy(cp->c_devvp, ap));
	1802	}
	1803
	1804
	1805	static int
	1806	do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
	1807	{
	1808	register struct cnode *cp = VTOC(vp);
	1809	struct filefork *fp = VTOF(vp);
	1810	struct proc *p = vfs_context_proc(context);;
	1811	kauth_cred_t cred = vfs_context_ucred(context);
	1812	int retval;
	1813	off_t bytesToAdd;
	1814	off_t actualBytesAdded;
	1815	off_t filebytes;
	1816	u_int64_t old_filesize;
	1817	u_long fileblocks;
	1818	int blksize;
	1819	struct hfsmount *hfsmp;
	1820	int lockflags;
	1821
	1822	blksize = VTOVCB(vp)->blockSize;
	1823	fileblocks = fp->ff_blocks;
	1824	filebytes = (off_t)fileblocks * (off_t)blksize;
	1825	old_filesize = fp->ff_size;
	1826
	1827	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_START,
	1828	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	1829
	1830	if (length < 0)
	1831	return (EINVAL);
	1832
	1833	if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
	1834	return (EFBIG);
	1835
	1836	hfsmp = VTOHFS(vp);
	1837
	1838	retval = E_NONE;
	1839
	1840	/* Files that are changing size are not hot file candidates. */
	1841	if (hfsmp->hfc_stage == HFC_RECORDING) {
	1842	fp->ff_bytesread = 0;
	1843	}
	1844
	1845	/*
	1846	* We cannot just check if fp->ff_size == length (as an optimization)
	1847	* since there may be extra physical blocks that also need truncation.
	1848	*/
	1849	#if QUOTA
	1850	if ((retval = hfs_getinoquota(cp)))
	1851	return(retval);
	1852	#endif /* QUOTA */
	1853
	1854	/*
	1855	* Lengthen the size of the file. We must ensure that the
	1856	* last byte of the file is allocated. Since the smallest
	1857	* value of ff_size is 0, length will be at least 1.
	1858	*/
	1859	if (length > (off_t)fp->ff_size) {
	1860	#if QUOTA
	1861	retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
	1862	cred, 0);
	1863	if (retval)
	1864	goto Err_Exit;
	1865	#endif /* QUOTA */
	1866	/*
	1867	* If we don't have enough physical space then
	1868	* we need to extend the physical size.
	1869	*/
	1870	if (length > filebytes) {
	1871	int eflags;
	1872	u_long blockHint = 0;
	1873
	1874	/* All or nothing and don't round up to clumpsize. */
	1875	eflags = kEFAllMask \| kEFNoClumpMask;
	1876
	1877	if (cred && suser(cred, NULL) != 0)
	1878	eflags \|= kEFReserveMask; /* keep a reserve */
	1879
	1880	/*
	1881	* Allocate Journal and Quota files in metadata zone.
	1882	*/
	1883	if (filebytes == 0 &&
	1884	hfsmp->hfs_flags & HFS_METADATA_ZONE &&
	1885	hfs_virtualmetafile(cp)) {
	1886	eflags \|= kEFMetadataMask;
	1887	blockHint = hfsmp->hfs_metazone_start;
	1888	}
	1889	if (hfs_start_transaction(hfsmp) != 0) {
	1890	retval = EINVAL;
	1891	goto Err_Exit;
	1892	}
	1893
	1894	/* Protect extents b-tree and allocation bitmap */
	1895	lockflags = SFL_BITMAP;
	1896	if (overflow_extents(fp))
	1897	lockflags \|= SFL_EXTENTS;
	1898	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	1899
	1900	while ((length > filebytes) && (retval == E_NONE)) {
	1901	bytesToAdd = length - filebytes;
	1902	retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
	1903	(FCB*)fp,
	1904	bytesToAdd,
	1905	blockHint,
	1906	eflags,
	1907	&actualBytesAdded));
	1908
	1909	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	1910	if (actualBytesAdded == 0 && retval == E_NONE) {
	1911	if (length > filebytes)
	1912	length = filebytes;
	1913	break;
	1914	}
	1915	} /* endwhile */
	1916
	1917	hfs_systemfile_unlock(hfsmp, lockflags);
	1918
	1919	if (hfsmp->jnl) {
	1920	(void) hfs_update(vp, TRUE);
	1921	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	1922	}
	1923
	1924	hfs_end_transaction(hfsmp);
	1925
	1926	if (retval)
	1927	goto Err_Exit;
	1928
	1929	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	1930	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	1931	}
	1932
	1933	if (!(flags & IO_NOZEROFILL)) {
	1934	if (UBCINFOEXISTS(vp) && retval == E_NONE) {
	1935	struct rl_entry *invalid_range;
	1936	off_t zero_limit;
	1937
	1938	zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	1939	if (length < zero_limit) zero_limit = length;
	1940
	1941	if (length > (off_t)fp->ff_size) {
	1942	struct timeval tv;
	1943
	1944	/* Extending the file: time to fill out the current last page w. zeroes? */
	1945	if ((fp->ff_size & PAGE_MASK_64) &&
	1946	(rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
	1947	fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
	1948
	1949	/* There's some valid data at the start of the (current) last page
	1950	of the file, so zero out the remainder of that page to ensure the
	1951	entire page contains valid data. Since there is no invalid range
	1952	possible past the (current) eof, there's no need to remove anything
	1953	from the invalid range list before calling cluster_write(): */
	1954	hfs_unlock(cp);
	1955	retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
	1956	fp->ff_size, (off_t)0,
	1957	(flags & IO_SYNC) \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	1958	hfs_lock(cp, HFS_FORCE_LOCK);
	1959	if (retval) goto Err_Exit;
	1960
	1961	/* Merely invalidate the remaining area, if necessary: */
	1962	if (length > zero_limit) {
	1963	microuptime(&tv);
	1964	rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
	1965	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	1966	}
	1967	} else {
	1968	/* The page containing the (current) eof is invalid: just add the
	1969	remainder of the page to the invalid list, along with the area
	1970	being newly allocated:
	1971	*/
	1972	microuptime(&tv);
	1973	rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
	1974	cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
	1975	};
	1976	}
	1977	} else {
	1978	panic("hfs_truncate: invoked on non-UBC object?!");
	1979	};
	1980	}
	1981	cp->c_touch_modtime = TRUE;
	1982	fp->ff_size = length;
	1983
	1984	/* Nested transactions will do their own ubc_setsize. */
	1985	if (!skipsetsize) {
	1986	/*
	1987	* ubc_setsize can cause a pagein here
	1988	* so we need to drop cnode lock.
	1989	*/
	1990	hfs_unlock(cp);
	1991	ubc_setsize(vp, length);
	1992	hfs_lock(cp, HFS_FORCE_LOCK);
	1993	}
	1994
	1995	} else { /* Shorten the size of the file */
	1996
	1997	if ((off_t)fp->ff_size > length) {
	1998	/*
	1999	* Any buffers that are past the truncation point need to be
	2000	* invalidated (to maintain buffer cache consistency).
	2001	*/
	2002
	2003	/* Nested transactions will do their own ubc_setsize. */
	2004	if (!skipsetsize) {
	2005	/*
	2006	* ubc_setsize can cause a pageout here
	2007	* so we need to drop cnode lock.
	2008	*/
	2009	hfs_unlock(cp);
	2010	ubc_setsize(vp, length);
	2011	hfs_lock(cp, HFS_FORCE_LOCK);
	2012	}
	2013
	2014	/* Any space previously marked as invalid is now irrelevant: */
	2015	rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
	2016	}
	2017
	2018	/*
	2019	* Account for any unmapped blocks. Note that the new
	2020	* file length can still end up with unmapped blocks.
	2021	*/
	2022	if (fp->ff_unallocblocks > 0) {
	2023	u_int32_t finalblks;
	2024	u_int32_t loanedBlocks;
	2025
	2026	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2027
	2028	loanedBlocks = fp->ff_unallocblocks;
	2029	cp->c_blocks -= loanedBlocks;
	2030	fp->ff_blocks -= loanedBlocks;
	2031	fp->ff_unallocblocks = 0;
	2032
	2033	hfsmp->loanedBlocks -= loanedBlocks;
	2034
	2035	finalblks = (length + blksize - 1) / blksize;
	2036	if (finalblks > fp->ff_blocks) {
	2037	/* calculate required unmapped blocks */
	2038	loanedBlocks = finalblks - fp->ff_blocks;
	2039	hfsmp->loanedBlocks += loanedBlocks;
	2040
	2041	fp->ff_unallocblocks = loanedBlocks;
	2042	cp->c_blocks += loanedBlocks;
	2043	fp->ff_blocks += loanedBlocks;
	2044	}
	2045	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2046	}
	2047
	2048	/*
	2049	* For a TBE process the deallocation of the file blocks is
	2050	* delayed until the file is closed. And hfs_close calls
	2051	* truncate with the IO_NDELAY flag set. So when IO_NDELAY
	2052	* isn't set, we make sure this isn't a TBE process.
	2053	*/
	2054	if ((flags & IO_NDELAY) \|\| (proc_tbe(p) == 0)) {
	2055	#if QUOTA
	2056	off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
	2057	#endif /* QUOTA */
	2058	if (hfs_start_transaction(hfsmp) != 0) {
	2059	retval = EINVAL;
	2060	goto Err_Exit;
	2061	}
	2062
	2063	if (fp->ff_unallocblocks == 0) {
	2064	/* Protect extents b-tree and allocation bitmap */
	2065	lockflags = SFL_BITMAP;
	2066	if (overflow_extents(fp))
	2067	lockflags \|= SFL_EXTENTS;
	2068	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2069
	2070	retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
	2071	(FCB*)fp, length, false));
	2072
	2073	hfs_systemfile_unlock(hfsmp, lockflags);
	2074	}
	2075	if (hfsmp->jnl) {
	2076	if (retval == 0) {
	2077	fp->ff_size = length;
	2078	}
	2079	(void) hfs_update(vp, TRUE);
	2080	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2081	}
	2082
	2083	hfs_end_transaction(hfsmp);
	2084
	2085	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	2086	if (retval)
	2087	goto Err_Exit;
	2088	#if QUOTA
	2089	/* These are bytesreleased */
	2090	(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
	2091	#endif /* QUOTA */
	2092	}
	2093	/* Only set update flag if the logical length changes */
	2094	if (old_filesize != length)
	2095	cp->c_touch_modtime = TRUE;
	2096	fp->ff_size = length;
	2097	}
	2098	cp->c_touch_chgtime = TRUE;
	2099	retval = hfs_update(vp, MNT_WAIT);
	2100	if (retval) {
	2101	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	2102	-1, -1, -1, retval, 0);
	2103	}
	2104
	2105	Err_Exit:
	2106
	2107	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_END,
	2108	(int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
	2109
	2110	return (retval);
	2111	}
	2112
	2113
	2114
	2115	/*
	2116	* Truncate a cnode to at most length size, freeing (or adding) the
	2117	* disk blocks.
	2118	*/
	2119	__private_extern__
	2120	int
	2121	hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
	2122	vfs_context_t context)
	2123	{
	2124	struct filefork *fp = VTOF(vp);
	2125	off_t filebytes;
	2126	u_long fileblocks;
	2127	int blksize, error = 0;
	2128	struct cnode *cp = VTOC(vp);
	2129
	2130	if (vnode_isdir(vp))
	2131	return (EISDIR); /* cannot truncate an HFS directory! */
	2132
	2133	blksize = VTOVCB(vp)->blockSize;
	2134	fileblocks = fp->ff_blocks;
	2135	filebytes = (off_t)fileblocks * (off_t)blksize;
	2136
	2137	// have to loop truncating or growing files that are
	2138	// really big because otherwise transactions can get
	2139	// enormous and consume too many kernel resources.
	2140
	2141	if (length < filebytes) {
	2142	while (filebytes > length) {
	2143	if ((filebytes - length) > HFS_BIGFILE_SIZE) {
	2144	filebytes -= HFS_BIGFILE_SIZE;
	2145	} else {
	2146	filebytes = length;
	2147	}
	2148	cp->c_flag \|= C_FORCEUPDATE;
	2149	error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
	2150	if (error)
	2151	break;
	2152	}
	2153	} else if (length > filebytes) {
	2154	while (filebytes < length) {
	2155	if ((length - filebytes) > HFS_BIGFILE_SIZE) {
	2156	filebytes += HFS_BIGFILE_SIZE;
	2157	} else {
	2158	filebytes = length;
	2159	}
	2160	cp->c_flag \|= C_FORCEUPDATE;
	2161	error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
	2162	if (error)
	2163	break;
	2164	}
	2165	} else /* Same logical size */ {
	2166
	2167	error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
	2168	}
	2169	/* Files that are changing size are not hot file candidates. */
	2170	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
	2171	fp->ff_bytesread = 0;
	2172	}
	2173
	2174	return (error);
	2175	}
	2176
	2177
	2178
	2179	/*
	2180	* Preallocate file storage space.
	2181	*/
	2182	int
	2183	hfs_vnop_allocate(struct vnop_allocate_args /* {
	2184	vnode_t a_vp;
	2185	off_t a_length;
	2186	u_int32_t a_flags;
	2187	off_t *a_bytesallocated;
	2188	off_t a_offset;
	2189	vfs_context_t a_context;
	2190	} / ap)
	2191	{
	2192	struct vnode *vp = ap->a_vp;
	2193	struct cnode *cp;
	2194	struct filefork *fp;
	2195	ExtendedVCB *vcb;
	2196	off_t length = ap->a_length;
	2197	off_t startingPEOF;
	2198	off_t moreBytesRequested;
	2199	off_t actualBytesAdded;
	2200	off_t filebytes;
	2201	u_long fileblocks;
	2202	int retval, retval2;
	2203	UInt32 blockHint;
	2204	UInt32 extendFlags; /* For call to ExtendFileC */
	2205	struct hfsmount *hfsmp;
	2206	kauth_cred_t cred = vfs_context_ucred(ap->a_context);
	2207	int lockflags;
	2208
	2209	*(ap->a_bytesallocated) = 0;
	2210
	2211	if (!vnode_isreg(vp))
	2212	return (EISDIR);
	2213	if (length < (off_t)0)
	2214	return (EINVAL);
	2215
	2216	if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
	2217	return (retval);
	2218	cp = VTOC(vp);
	2219	fp = VTOF(vp);
	2220	hfsmp = VTOHFS(vp);
	2221	vcb = VTOVCB(vp);
	2222
	2223	fileblocks = fp->ff_blocks;
	2224	filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
	2225
	2226	if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
	2227	retval = EINVAL;
	2228	goto Err_Exit;
	2229	}
	2230
	2231	/* Fill in the flags word for the call to Extend the file */
	2232
	2233	extendFlags = kEFNoClumpMask;
	2234	if (ap->a_flags & ALLOCATECONTIG)
	2235	extendFlags \|= kEFContigMask;
	2236	if (ap->a_flags & ALLOCATEALL)
	2237	extendFlags \|= kEFAllMask;
	2238	if (cred && suser(cred, NULL) != 0)
	2239	extendFlags \|= kEFReserveMask;
	2240
	2241	retval = E_NONE;
	2242	blockHint = 0;
	2243	startingPEOF = filebytes;
	2244
	2245	if (ap->a_flags & ALLOCATEFROMPEOF)
	2246	length += filebytes;
	2247	else if (ap->a_flags & ALLOCATEFROMVOL)
	2248	blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
	2249
	2250	/* If no changes are necesary, then we're done */
	2251	if (filebytes == length)
	2252	goto Std_Exit;
	2253
	2254	/*
	2255	* Lengthen the size of the file. We must ensure that the
	2256	* last byte of the file is allocated. Since the smallest
	2257	* value of filebytes is 0, length will be at least 1.
	2258	*/
	2259	if (length > filebytes) {
	2260	moreBytesRequested = length - filebytes;
	2261
	2262	#if QUOTA
	2263	retval = hfs_chkdq(cp,
	2264	(int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
	2265	cred, 0);
	2266	if (retval)
	2267	goto Err_Exit;
	2268
	2269	#endif /* QUOTA */
	2270	/*
	2271	* Metadata zone checks.
	2272	*/
	2273	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
	2274	/*
	2275	* Allocate Journal and Quota files in metadata zone.
	2276	*/
	2277	if (hfs_virtualmetafile(cp)) {
	2278	extendFlags \|= kEFMetadataMask;
	2279	blockHint = hfsmp->hfs_metazone_start;
	2280	} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
	2281	(blockHint <= hfsmp->hfs_metazone_end)) {
	2282	/*
	2283	* Move blockHint outside metadata zone.
	2284	*/
	2285	blockHint = hfsmp->hfs_metazone_end + 1;
	2286	}
	2287	}
	2288
	2289	if (hfs_start_transaction(hfsmp) != 0) {
	2290	retval = EINVAL;
	2291	goto Err_Exit;
	2292	}
	2293
	2294	/* Protect extents b-tree and allocation bitmap */
	2295	lockflags = SFL_BITMAP;
	2296	if (overflow_extents(fp))
	2297	lockflags \|= SFL_EXTENTS;
	2298	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2299
	2300	retval = MacToVFSError(ExtendFileC(vcb,
	2301	(FCB*)fp,
	2302	moreBytesRequested,
	2303	blockHint,
	2304	extendFlags,
	2305	&actualBytesAdded));
	2306
	2307	*(ap->a_bytesallocated) = actualBytesAdded;
	2308	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	2309
	2310	hfs_systemfile_unlock(hfsmp, lockflags);
	2311
	2312	if (hfsmp->jnl) {
	2313	(void) hfs_update(vp, TRUE);
	2314	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2315	}
	2316
	2317	hfs_end_transaction(hfsmp);
	2318
	2319	/*
	2320	* if we get an error and no changes were made then exit
	2321	* otherwise we must do the hfs_update to reflect the changes
	2322	*/
	2323	if (retval && (startingPEOF == filebytes))
	2324	goto Err_Exit;
	2325
	2326	/*
	2327	* Adjust actualBytesAdded to be allocation block aligned, not
	2328	* clump size aligned.
	2329	* NOTE: So what we are reporting does not affect reality
	2330	* until the file is closed, when we truncate the file to allocation
	2331	* block size.
	2332	*/
	2333	if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
	2334	*(ap->a_bytesallocated) =
	2335	roundup(moreBytesRequested, (off_t)vcb->blockSize);
	2336
	2337	} else { /* Shorten the size of the file */
	2338
	2339	if (fp->ff_size > length) {
	2340	/*
	2341	* Any buffers that are past the truncation point need to be
	2342	* invalidated (to maintain buffer cache consistency).
	2343	*/
	2344	}
	2345
	2346	if (hfs_start_transaction(hfsmp) != 0) {
	2347	retval = EINVAL;
	2348	goto Err_Exit;
	2349	}
	2350
	2351	/* Protect extents b-tree and allocation bitmap */
	2352	lockflags = SFL_BITMAP;
	2353	if (overflow_extents(fp))
	2354	lockflags \|= SFL_EXTENTS;
	2355	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2356
	2357	retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
	2358
	2359	hfs_systemfile_unlock(hfsmp, lockflags);
	2360
	2361	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	2362
	2363	if (hfsmp->jnl) {
	2364	(void) hfs_update(vp, TRUE);
	2365	(void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
	2366	}
	2367
	2368	hfs_end_transaction(hfsmp);
	2369
	2370
	2371	/*
	2372	* if we get an error and no changes were made then exit
	2373	* otherwise we must do the hfs_update to reflect the changes
	2374	*/
	2375	if (retval && (startingPEOF == filebytes)) goto Err_Exit;
	2376	#if QUOTA
	2377	/* These are bytesreleased */
	2378	(void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
	2379	#endif /* QUOTA */
	2380
	2381	if (fp->ff_size > filebytes) {
	2382	fp->ff_size = filebytes;
	2383
	2384	hfs_unlock(cp);
	2385	ubc_setsize(vp, fp->ff_size);
	2386	hfs_lock(cp, HFS_FORCE_LOCK);
	2387	}
	2388	}
	2389
	2390	Std_Exit:
	2391	cp->c_touch_chgtime = TRUE;
	2392	cp->c_touch_modtime = TRUE;
	2393	retval2 = hfs_update(vp, MNT_WAIT);
	2394
	2395	if (retval == 0)
	2396	retval = retval2;
	2397	Err_Exit:
	2398	hfs_unlock(cp);
	2399	return (retval);
	2400	}
	2401
	2402
	2403	/*
	2404	* Pagein for HFS filesystem
	2405	*/
	2406	int
	2407	hfs_vnop_pagein(struct vnop_pagein_args *ap)
	2408	/*
	2409	struct vnop_pagein_args {
	2410	vnode_t a_vp,
	2411	upl_t a_pl,
	2412	vm_offset_t a_pl_offset,
	2413	off_t a_f_offset,
	2414	size_t a_size,
	2415	int a_flags
	2416	vfs_context_t a_context;
	2417	};
	2418	*/
	2419	{
	2420	vnode_t vp = ap->a_vp;
	2421	int error;
	2422
	2423	error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
	2424	ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
	2425	/*
	2426	* Keep track of blocks read.
	2427	*/
	2428	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
	2429	struct cnode *cp;
	2430	struct filefork *fp;
	2431	int bytesread;
	2432	int took_cnode_lock = 0;
	2433
	2434	cp = VTOC(vp);
	2435	fp = VTOF(vp);
	2436
	2437	if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
	2438	bytesread = fp->ff_size;
	2439	else
	2440	bytesread = ap->a_size;
	2441
	2442	/* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
	2443	if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
	2444	hfs_lock(cp, HFS_FORCE_LOCK);
	2445	took_cnode_lock = 1;
	2446	}
	2447	/*
	2448	* If this file hasn't been seen since the start of
	2449	* the current sampling period then start over.
	2450	*/
	2451	if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
	2452	struct timeval tv;
	2453
	2454	fp->ff_bytesread = bytesread;
	2455	microtime(&tv);
	2456	cp->c_atime = tv.tv_sec;
	2457	} else {
	2458	fp->ff_bytesread += bytesread;
	2459	}
	2460	cp->c_touch_acctime = TRUE;
	2461	if (took_cnode_lock)
	2462	hfs_unlock(cp);
	2463	}
	2464	return (error);
	2465	}
	2466
	2467	/*
	2468	* Pageout for HFS filesystem.
	2469	*/
	2470	int
	2471	hfs_vnop_pageout(struct vnop_pageout_args *ap)
	2472	/*
	2473	struct vnop_pageout_args {
	2474	vnode_t a_vp,
	2475	upl_t a_pl,
	2476	vm_offset_t a_pl_offset,
	2477	off_t a_f_offset,
	2478	size_t a_size,
	2479	int a_flags
	2480	vfs_context_t a_context;
	2481	};
	2482	*/
	2483	{
	2484	vnode_t vp = ap->a_vp;
	2485	struct cnode *cp;
	2486	struct filefork *fp;
	2487	int retval;
	2488	off_t end_of_range;
	2489	off_t filesize;
	2490
	2491	cp = VTOC(vp);
	2492	if (cp->c_lockowner == current_thread()) {
	2493	panic("pageout: %s cnode lock already held!\n",
	2494	cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
	2495	}
	2496	if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
	2497	if (!(ap->a_flags & UPL_NOCOMMIT)) {
	2498	ubc_upl_abort_range(ap->a_pl,
	2499	ap->a_pl_offset,
	2500	ap->a_size,
	2501	UPL_ABORT_FREE_ON_EMPTY);
	2502	}
	2503	return (retval);
	2504	}
	2505	fp = VTOF(vp);
	2506
	2507	filesize = fp->ff_size;
	2508	end_of_range = ap->a_f_offset + ap->a_size - 1;
	2509
	2510	if (end_of_range >= filesize) {
	2511	end_of_range = (off_t)(filesize - 1);
	2512	}
	2513	if (ap->a_f_offset < filesize) {
	2514	rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
	2515	cp->c_flag \|= C_MODIFIED; /* leof is dirty */
	2516	}
	2517	hfs_unlock(cp);
	2518
	2519	retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
	2520	ap->a_size, filesize, ap->a_flags);
	2521
	2522	/*
	2523	* If data was written, and setuid or setgid bits are set and
	2524	* this process is not the superuser then clear the setuid and
	2525	* setgid bits as a precaution against tampering.
	2526	*/
	2527	if ((retval == 0) &&
	2528	(cp->c_mode & (S_ISUID \| S_ISGID)) &&
	2529	(vfs_context_suser(ap->a_context) != 0)) {
	2530	hfs_lock(cp, HFS_FORCE_LOCK);
	2531	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	2532	cp->c_touch_chgtime = TRUE;
	2533	hfs_unlock(cp);
	2534	}
	2535	return (retval);
	2536	}
	2537
	2538	/*
	2539	* Intercept B-Tree node writes to unswap them if necessary.
	2540	*/
	2541	int
	2542	hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
	2543	{
	2544	int retval = 0;
	2545	register struct buf *bp = ap->a_bp;
	2546	register struct vnode *vp = buf_vnode(bp);
	2547	BlockDescriptor block;
	2548
	2549	/* Trap B-Tree writes */
	2550	if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) \|\|
	2551	(VTOC(vp)->c_fileid == kHFSCatalogFileID) \|\|
	2552	(VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
	2553
	2554	/*
	2555	* Swap and validate the node if it is in native byte order.
	2556	* This is always be true on big endian, so we always validate
	2557	* before writing here. On little endian, the node typically has
	2558	* been swapped and validatated when it was written to the journal,
	2559	* so we won't do anything here.
	2560	*/
	2561	if (((UInt16 )((char )buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
	2562	/* Prepare the block pointer */
	2563	block.blockHeader = bp;
	2564	block.buffer = (char *)buf_dataptr(bp);
	2565	block.blockNum = buf_lblkno(bp);
	2566	/* not found in cache ==> came from disk */
	2567	block.blockReadFromDisk = (buf_fromcache(bp) == 0);
	2568	block.blockSize = buf_count(bp);
	2569
	2570	/* Endian un-swap B-Tree node */
	2571	retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
	2572	if (retval)
	2573	panic("hfs_vnop_bwrite: about to write corrupt node!\n");
	2574	}
	2575	}
	2576
	2577	/* This buffer shouldn't be locked anymore but if it is clear it */
	2578	if ((buf_flags(bp) & B_LOCKED)) {
	2579	// XXXdbg
	2580	if (VTOHFS(vp)->jnl) {
	2581	panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
	2582	}
	2583	buf_clearflags(bp, B_LOCKED);
	2584	}
	2585	retval = vn_bwrite (ap);
	2586
	2587	return (retval);
	2588	}
	2589
	2590	/*
	2591	* Relocate a file to a new location on disk
	2592	* cnode must be locked on entry
	2593	*
	2594	* Relocation occurs by cloning the file's data from its
	2595	* current set of blocks to a new set of blocks. During
	2596	* the relocation all of the blocks (old and new) are
	2597	* owned by the file.
	2598	*
	2599	* -----------------
	2600	* \|///////////////\|
	2601	* -----------------
	2602	* 0 N (file offset)
	2603	*
	2604	* ----------------- -----------------
	2605	* \|///////////////\| \| \| STEP 1 (aquire new blocks)
	2606	* ----------------- -----------------
	2607	* 0 N N+1 2N
	2608	*
	2609	* ----------------- -----------------
	2610	* \|///////////////\| \|///////////////\| STEP 2 (clone data)
	2611	* ----------------- -----------------
	2612	* 0 N N+1 2N
	2613	*
	2614	* -----------------
	2615	* \|///////////////\| STEP 3 (head truncate blocks)
	2616	* -----------------
	2617	* 0 N
	2618	*
	2619	* During steps 2 and 3 page-outs to file offsets less
	2620	* than or equal to N are suspended.
	2621	*
	2622	* During step 3 page-ins to the file get supended.
	2623	*/
	2624	__private_extern__
	2625	int
	2626	hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
	2627	struct proc *p)
	2628	{
	2629	struct cnode *cp;
	2630	struct filefork *fp;
	2631	struct hfsmount *hfsmp;
	2632	u_int32_t headblks;
	2633	u_int32_t datablks;
	2634	u_int32_t blksize;
	2635	u_int32_t growsize;
	2636	u_int32_t nextallocsave;
	2637	daddr64_t sector_a, sector_b;
	2638	int disabled_caching = 0;
	2639	int eflags;
	2640	off_t newbytes;
	2641	int retval;
	2642	int lockflags = 0;
	2643	int took_trunc_lock = 0;
	2644	int started_tr = 0;
	2645	enum vtype vnodetype;
	2646
	2647	vnodetype = vnode_vtype(vp);
	2648	if (vnodetype != VREG && vnodetype != VLNK) {
	2649	return (EPERM);
	2650	}
	2651
	2652	hfsmp = VTOHFS(vp);
	2653	if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
	2654	return (ENOSPC);
	2655	}
	2656
	2657	cp = VTOC(vp);
	2658	fp = VTOF(vp);
	2659	if (fp->ff_unallocblocks)
	2660	return (EINVAL);
	2661	blksize = hfsmp->blockSize;
	2662	if (blockHint == 0)
	2663	blockHint = hfsmp->nextAllocation;
	2664
	2665	if ((fp->ff_size > (u_int64_t)0x7fffffff) \|\|
	2666	((fp->ff_size > blksize) && vnodetype == VLNK)) {
	2667	return (EFBIG);
	2668	}
	2669
	2670	//
	2671	// We do not believe that this call to hfs_fsync() is
	2672	// necessary and it causes a journal transaction
	2673	// deadlock so we are removing it.
	2674	//
	2675	//if (vnodetype == VREG && !vnode_issystem(vp)) {
	2676	// retval = hfs_fsync(vp, MNT_WAIT, 0, p);
	2677	// if (retval)
	2678	// return (retval);
	2679	//}
	2680
	2681	if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
	2682	hfs_unlock(cp);
	2683	hfs_lock_truncate(cp, TRUE);
	2684	if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
	2685	hfs_unlock_truncate(cp);
	2686	return (retval);
	2687	}
	2688	took_trunc_lock = 1;
	2689	}
	2690	headblks = fp->ff_blocks;
	2691	datablks = howmany(fp->ff_size, blksize);
	2692	growsize = datablks * blksize;
	2693	eflags = kEFContigMask \| kEFAllMask \| kEFNoClumpMask;
	2694	if (blockHint >= hfsmp->hfs_metazone_start &&
	2695	blockHint <= hfsmp->hfs_metazone_end)
	2696	eflags \|= kEFMetadataMask;
	2697
	2698	if (hfs_start_transaction(hfsmp) != 0) {
	2699	if (took_trunc_lock)
	2700	hfs_unlock_truncate(cp);
	2701	return (EINVAL);
	2702	}
	2703	started_tr = 1;
	2704	/*
	2705	* Protect the extents b-tree and the allocation bitmap
	2706	* during MapFileBlockC and ExtendFileC operations.
	2707	*/
	2708	lockflags = SFL_BITMAP;
	2709	if (overflow_extents(fp))
	2710	lockflags \|= SFL_EXTENTS;
	2711	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2712
	2713	retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
	2714	if (retval) {
	2715	retval = MacToVFSError(retval);
	2716	goto out;
	2717	}
	2718
	2719	/*
	2720	* STEP 1 - aquire new allocation blocks.
	2721	*/
	2722	if (!vnode_isnocache(vp)) {
	2723	vnode_setnocache(vp);
	2724	disabled_caching = 1;
	2725
	2726	}
	2727	nextallocsave = hfsmp->nextAllocation;
	2728	retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
	2729	if (eflags & kEFMetadataMask) {
	2730	HFS_MOUNT_LOCK(hfsmp, TRUE);
	2731	hfsmp->nextAllocation = nextallocsave;
	2732	hfsmp->vcbFlags \|= 0xFF00;
	2733	HFS_MOUNT_UNLOCK(hfsmp, TRUE);
	2734	}
	2735
	2736	retval = MacToVFSError(retval);
	2737	if (retval == 0) {
	2738	cp->c_flag \|= C_MODIFIED;
	2739	if (newbytes < growsize) {
	2740	retval = ENOSPC;
	2741	goto restore;
	2742	} else if (fp->ff_blocks < (headblks + datablks)) {
	2743	printf("hfs_relocate: allocation failed");
	2744	retval = ENOSPC;
	2745	goto restore;
	2746	}
	2747
	2748	retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
	2749	if (retval) {
	2750	retval = MacToVFSError(retval);
	2751	} else if ((sector_a + 1) == sector_b) {
	2752	retval = ENOSPC;
	2753	goto restore;
	2754	} else if ((eflags & kEFMetadataMask) &&
	2755	((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
	2756	hfsmp->hfs_metazone_end)) {
	2757	printf("hfs_relocate: didn't move into metadata zone\n");
	2758	retval = ENOSPC;
	2759	goto restore;
	2760	}
	2761	}
	2762	/* Done with system locks and journal for now. */
	2763	hfs_systemfile_unlock(hfsmp, lockflags);
	2764	lockflags = 0;
	2765	hfs_end_transaction(hfsmp);
	2766	started_tr = 0;
	2767
	2768	if (retval) {
	2769	/*
	2770	* Check to see if failure is due to excessive fragmentation.
	2771	*/
	2772	if ((retval == ENOSPC) &&
	2773	(hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
	2774	hfsmp->hfs_flags \|= HFS_FRAGMENTED_FREESPACE;
	2775	}
	2776	goto out;
	2777	}
	2778	/*
	2779	* STEP 2 - clone file data into the new allocation blocks.
	2780	*/
	2781
	2782	if (vnodetype == VLNK)
	2783	retval = hfs_clonelink(vp, blksize, cred, p);
	2784	else if (vnode_issystem(vp))
	2785	retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
	2786	else
	2787	retval = hfs_clonefile(vp, headblks, datablks, blksize);
	2788
	2789	/* Start transaction for step 3 or for a restore. */
	2790	if (hfs_start_transaction(hfsmp) != 0) {
	2791	retval = EINVAL;
	2792	goto out;
	2793	}
	2794	started_tr = 1;
	2795	if (retval)
	2796	goto restore;
	2797
	2798	/*
	2799	* STEP 3 - switch to cloned data and remove old blocks.
	2800	*/
	2801	lockflags = SFL_BITMAP;
	2802	if (overflow_extents(fp))
	2803	lockflags \|= SFL_EXTENTS;
	2804	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2805
	2806	retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
	2807
	2808	hfs_systemfile_unlock(hfsmp, lockflags);
	2809	lockflags = 0;
	2810	if (retval)
	2811	goto restore;
	2812	out:
	2813	if (took_trunc_lock)
	2814	hfs_unlock_truncate(cp);
	2815
	2816	if (lockflags) {
	2817	hfs_systemfile_unlock(hfsmp, lockflags);
	2818	lockflags = 0;
	2819	}
	2820
	2821	// See comment up above about calls to hfs_fsync()
	2822	//
	2823	//if (retval == 0)
	2824	// retval = hfs_fsync(vp, MNT_WAIT, 0, p);
	2825
	2826	if (hfsmp->jnl) {
	2827	if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
	2828	(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
	2829	else
	2830	(void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	2831	}
	2832	exit:
	2833	if (disabled_caching) {
	2834	vnode_clearnocache(vp);
	2835	}
	2836	if (started_tr)
	2837	hfs_end_transaction(hfsmp);
	2838
	2839	return (retval);
	2840
	2841	restore:
	2842	if (fp->ff_blocks == headblks)
	2843	goto exit;
	2844	/*
	2845	* Give back any newly allocated space.
	2846	*/
	2847	if (lockflags == 0) {
	2848	lockflags = SFL_BITMAP;
	2849	if (overflow_extents(fp))
	2850	lockflags \|= SFL_EXTENTS;
	2851	lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
	2852	}
	2853
	2854	(void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
	2855
	2856	hfs_systemfile_unlock(hfsmp, lockflags);
	2857	lockflags = 0;
	2858
	2859	if (took_trunc_lock)
	2860	hfs_unlock_truncate(cp);
	2861	goto exit;
	2862	}
	2863
	2864
	2865	/*
	2866	* Clone a symlink.
	2867	*
	2868	*/
	2869	static int
	2870	hfs_clonelink(struct vnode vp, int blksize, kauth_cred_t cred, struct proc p)
	2871	{
	2872	struct buf *head_bp = NULL;
	2873	struct buf *tail_bp = NULL;
	2874	int error;
	2875
	2876
	2877	error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
	2878	if (error)
	2879	goto out;
	2880
	2881	tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
	2882	if (tail_bp == NULL) {
	2883	error = EIO;
	2884	goto out;
	2885	}
	2886	bcopy((char )buf_dataptr(head_bp), (char )buf_dataptr(tail_bp), blksize);
	2887	error = (int)buf_bwrite(tail_bp);
	2888	out:
	2889	if (head_bp) {
	2890	buf_markinvalid(head_bp);
	2891	buf_brelse(head_bp);
	2892	}
	2893	(void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
	2894
	2895	return (error);
	2896	}
	2897
	2898	/*
	2899	* Clone a file's data within the file.
	2900	*
	2901	*/
	2902	static int
	2903	hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
	2904	{
	2905	caddr_t bufp;
	2906	size_t writebase;
	2907	size_t bufsize;
	2908	size_t copysize;
	2909	size_t iosize;
	2910	off_t filesize;
	2911	size_t offset;
	2912	uio_t auio;
	2913	int error = 0;
	2914
	2915	filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
	2916	writebase = blkstart * blksize;
	2917	copysize = blkcnt * blksize;
	2918	iosize = bufsize = MIN(copysize, 4096 * 16);
	2919	offset = 0;
	2920
	2921	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	2922	return (ENOMEM);
	2923	}
	2924	hfs_unlock(VTOC(vp));
	2925
	2926	auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
	2927
	2928	while (offset < copysize) {
	2929	iosize = MIN(copysize - offset, iosize);
	2930
	2931	uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
	2932	uio_addiov(auio, (uintptr_t)bufp, iosize);
	2933
	2934	error = cluster_read(vp, auio, copysize, 0);
	2935	if (error) {
	2936	printf("hfs_clonefile: cluster_read failed - %d\n", error);
	2937	break;
	2938	}
	2939	if (uio_resid(auio) != 0) {
	2940	printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
	2941	error = EIO;
	2942	break;
	2943	}
	2944
	2945	uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
	2946	uio_addiov(auio, (uintptr_t)bufp, iosize);
	2947
	2948	error = cluster_write(vp, auio, filesize + offset,
	2949	filesize + offset + iosize,
	2950	uio_offset(auio), 0, IO_NOCACHE \| IO_SYNC);
	2951	if (error) {
	2952	printf("hfs_clonefile: cluster_write failed - %d\n", error);
	2953	break;
	2954	}
	2955	if (uio_resid(auio) != 0) {
	2956	printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
	2957	error = EIO;
	2958	break;
	2959	}
	2960	offset += iosize;
	2961	}
	2962	uio_free(auio);
	2963
	2964	/*
	2965	* No need to call ubc_sync_range or hfs_invalbuf
	2966	* since the file was copied using IO_NOCACHE.
	2967	*/
	2968
	2969	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	2970
	2971	hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
	2972	return (error);
	2973	}
	2974
	2975	/*
	2976	* Clone a system (metadata) file.
	2977	*
	2978	*/
	2979	static int
	2980	hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
	2981	kauth_cred_t cred, struct proc *p)
	2982	{
	2983	caddr_t bufp;
	2984	char * offset;
	2985	size_t bufsize;
	2986	size_t iosize;
	2987	struct buf *bp = NULL;
	2988	daddr64_t blkno;
	2989	daddr64_t blk;
	2990	daddr64_t start_blk;
	2991	daddr64_t last_blk;
	2992	int breadcnt;
	2993	int i;
	2994	int error = 0;
	2995
	2996
	2997	iosize = GetLogicalBlockSize(vp);
	2998	bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
	2999	breadcnt = bufsize / iosize;
	3000
	3001	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	3002	return (ENOMEM);
	3003	}
	3004	start_blk = ((daddr64_t)blkstart * blksize) / iosize;
	3005	last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
	3006	blkno = 0;
	3007
	3008	while (blkno < last_blk) {
	3009	/*
	3010	* Read up to a megabyte
	3011	*/
	3012	offset = bufp;
	3013	for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
	3014	error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
	3015	if (error) {
	3016	printf("hfs_clonesysfile: meta_bread error %d\n", error);
	3017	goto out;
	3018	}
	3019	if (buf_count(bp) != iosize) {
	3020	printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
	3021	goto out;
	3022	}
	3023	bcopy((char *)buf_dataptr(bp), offset, iosize);
	3024
	3025	buf_markinvalid(bp);
	3026	buf_brelse(bp);
	3027	bp = NULL;
	3028
	3029	offset += iosize;
	3030	}
	3031
	3032	/*
	3033	* Write up to a megabyte
	3034	*/
	3035	offset = bufp;
	3036	for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
	3037	bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
	3038	if (bp == NULL) {
	3039	printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
	3040	error = EIO;
	3041	goto out;
	3042	}
	3043	bcopy(offset, (char *)buf_dataptr(bp), iosize);
	3044	error = (int)buf_bwrite(bp);
	3045	bp = NULL;
	3046	if (error)
	3047	goto out;
	3048	offset += iosize;
	3049	}
	3050	}
	3051	out:
	3052	if (bp) {
	3053	buf_brelse(bp);
	3054	}
	3055
	3056	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	3057
	3058	error = hfs_fsync(vp, MNT_WAIT, 0, p);
	3059
	3060	return (error);
	3061	}