git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/* @(#)hfs_readwrite.c 1.0
	23	*
	24	* (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
	25	*
	26	* hfs_readwrite.c -- vnode operations to deal with reading and writing files.
	27	*
	28	*/
	29
	30	#include <sys/param.h>
	31	#include <sys/systm.h>
	32	#include <sys/resourcevar.h>
	33	#include <sys/kernel.h>
	34	#include <sys/fcntl.h>
	35	#include <sys/filedesc.h>
	36	#include <sys/stat.h>
	37	#include <sys/buf.h>
	38	#include <sys/proc.h>
	39	#include <sys/vnode.h>
	40	#include <sys/uio.h>
	41
	42	#include <miscfs/specfs/specdev.h>
	43
	44	#include <sys/ubc.h>
	45	#include <vm/vm_pageout.h>
	46
	47	#include <sys/kdebug.h>
	48
	49	#include "hfs.h"
	50	#include "hfs_endian.h"
	51	#include "hfs_quota.h"
	52	#include "hfscommon/headers/FileMgrInternal.h"
	53	#include "hfscommon/headers/BTreesInternal.h"
	54	#include "hfs_cnode.h"
	55	#include "hfs_dbg.h"
	56
	57	extern int overflow_extents(struct filefork *fp);
	58
	59	#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
	60
	61	enum {
	62	MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
	63	};
	64
	65	extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
	66
	67	static int hfs_clonelink(struct vnode , int, struct ucred , struct proc *);
	68	static int hfs_clonefile(struct vnode , int, int, int, struct ucred , struct proc *);
	69	static int hfs_clonesysfile(struct vnode , int, int, int, struct ucred , struct proc *);
	70
	71
	72	/*****************************************************************************
	73	*
	74	* Operations on vnodes
	75	*
	76	*****************************************************************************/
	77
	78	/*
	79	#% read vp L L L
	80	#
	81	vop_read {
	82	IN struct vnode *vp;
	83	INOUT struct uio *uio;
	84	IN int ioflag;
	85	IN struct ucred *cred;
	86
	87	*/
	88
	89	int
	90	hfs_read(ap)
	91	struct vop_read_args /* {
	92	struct vnode *a_vp;
	93	struct uio *a_uio;
	94	int a_ioflag;
	95	struct ucred *a_cred;
	96	} / ap;
	97	{
	98	register struct uio *uio = ap->a_uio;
	99	register struct vnode *vp = ap->a_vp;
	100	struct cnode *cp;
	101	struct filefork *fp;
	102	int devBlockSize = 0;
	103	int retval = 0;
	104	off_t filesize;
	105	off_t filebytes;
	106	off_t start_resid = uio->uio_resid;
	107
	108
	109	/* Preflight checks */
	110	if ((vp->v_type != VREG) \|\| !UBCINFOEXISTS(vp))
	111	return (EPERM); /* can only read regular files */
	112	if (uio->uio_resid == 0)
	113	return (0); /* Nothing left to do */
	114	if (uio->uio_offset < 0)
	115	return (EINVAL); /* cant read from a negative offset */
	116
	117	cp = VTOC(vp);
	118	fp = VTOF(vp);
	119	filesize = fp->ff_size;
	120	filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
	121	if (uio->uio_offset > filesize) {
	122	if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
	123	return (EFBIG);
	124	else
	125	return (0);
	126	}
	127
	128	VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
	129
	130	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_START,
	131	(int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
	132
	133	retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
	134
	135	cp->c_flag \|= C_ACCESS;
	136
	137	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) \| DBG_FUNC_END,
	138	(int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
	139
	140	/*
	141	* Keep track blocks read
	142	*/
	143	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
	144	/*
	145	* If this file hasn't been seen since the start of
	146	* the current sampling period then start over.
	147	*/
	148	if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
	149	fp->ff_bytesread = start_resid - uio->uio_resid;
	150	cp->c_atime = time.tv_sec;
	151	} else {
	152	fp->ff_bytesread += start_resid - uio->uio_resid;
	153	}
	154	}
	155
	156	return (retval);
	157	}
	158
	159	/*
	160	* Write data to a file or directory.
	161	#% write vp L L L
	162	#
	163	vop_write {
	164	IN struct vnode *vp;
	165	INOUT struct uio *uio;
	166	IN int ioflag;
	167	IN struct ucred *cred;
	168
	169	*/
	170	int
	171	hfs_write(ap)
	172	struct vop_write_args /* {
	173	struct vnode *a_vp;
	174	struct uio *a_uio;
	175	int a_ioflag;
	176	struct ucred *a_cred;
	177	} / ap;
	178	{
	179	struct vnode *vp = ap->a_vp;
	180	struct uio *uio = ap->a_uio;
	181	struct cnode *cp;
	182	struct filefork *fp;
	183	struct proc *p;
	184	struct timeval tv;
	185	ExtendedVCB *vcb;
	186	int devBlockSize = 0;
	187	off_t origFileSize, writelimit, bytesToAdd;
	188	off_t actualBytesAdded;
	189	u_long resid;
	190	int eflags, ioflag;
	191	int retval;
	192	off_t filebytes;
	193	struct hfsmount *hfsmp;
	194	int started_tr = 0, grabbed_lock = 0;
	195
	196
	197	if (uio->uio_offset < 0)
	198	return (EINVAL);
	199	if (uio->uio_resid == 0)
	200	return (E_NONE);
	201	if ((vp->v_type != VREG) \|\| !UBCINFOEXISTS(vp))
	202	return (EPERM); /* Can only write regular files */
	203
	204	ioflag = ap->a_ioflag;
	205	cp = VTOC(vp);
	206	fp = VTOF(vp);
	207	vcb = VTOVCB(vp);
	208	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	209
	210	if (ioflag & IO_APPEND)
	211	uio->uio_offset = fp->ff_size;
	212	if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
	213	return (EPERM);
	214
	215	// XXXdbg - don't allow modification of the journal or journal_info_block
	216	if (VTOHFS(vp)->jnl && cp->c_datafork) {
	217	struct HFSPlusExtentDescriptor *extd;
	218
	219	extd = &cp->c_datafork->ff_extents[0];
	220	if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock \|\| extd->startBlock == VTOHFS(vp)->jnl_start) {
	221	return EPERM;
	222	}
	223	}
	224
	225	writelimit = uio->uio_offset + uio->uio_resid;
	226
	227	/*
	228	* Maybe this should be above the vnode op call, but so long as
	229	* file servers have no limits, I don't think it matters.
	230	*/
	231	p = uio->uio_procp;
	232	if (vp->v_type == VREG && p &&
	233	writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
	234	psignal(p, SIGXFSZ);
	235	return (EFBIG);
	236	}
	237	p = current_proc();
	238
	239	VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
	240
	241	resid = uio->uio_resid;
	242	origFileSize = fp->ff_size;
	243	eflags = kEFDeferMask; /* defer file block allocations */
	244	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	245
	246	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_START,
	247	(int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
	248	retval = 0;
	249
	250	/* Now test if we need to extend the file */
	251	/* Doing so will adjust the filebytes for us */
	252
	253	#if QUOTA
	254	if(writelimit > filebytes) {
	255	bytesToAdd = writelimit - filebytes;
	256
	257	retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
	258	ap->a_cred, 0);
	259	if (retval)
	260	return (retval);
	261	}
	262	#endif /* QUOTA */
	263
	264	hfsmp = VTOHFS(vp);
	265
	266	#ifdef HFS_SPARSE_DEV
	267	/*
	268	* When the underlying device is sparse and space
	269	* is low (< 8MB), stop doing delayed allocations
	270	* and begin doing synchronous I/O.
	271	*/
	272	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	273	(hfs_freeblks(hfsmp, 0) < 2048)) {
	274	eflags &= ~kEFDeferMask;
	275	ioflag \|= IO_SYNC;
	276	}
	277	#endif /* HFS_SPARSE_DEV */
	278
	279	if (writelimit > filebytes) {
	280	hfs_global_shared_lock_acquire(hfsmp);
	281	grabbed_lock = 1;
	282	}
	283	if (hfsmp->jnl && (writelimit > filebytes)) {
	284	if (journal_start_transaction(hfsmp->jnl) != 0) {
	285	hfs_global_shared_lock_release(hfsmp);
	286	return EINVAL;
	287	}
	288	started_tr = 1;
	289	}
	290
	291	while (writelimit > filebytes) {
	292	bytesToAdd = writelimit - filebytes;
	293	if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
	294	eflags \|= kEFReserveMask;
	295
	296	/* lock extents b-tree (also protects volume bitmap) */
	297	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
	298	if (retval != E_NONE)
	299	break;
	300
	301	/* Files that are changing size are not hot file candidates. */
	302	if (hfsmp->hfc_stage == HFC_RECORDING) {
	303	fp->ff_bytesread = 0;
	304	}
	305	retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
	306	0, eflags, &actualBytesAdded));
	307
	308	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
	309	if ((actualBytesAdded == 0) && (retval == E_NONE))
	310	retval = ENOSPC;
	311	if (retval != E_NONE)
	312	break;
	313	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	314	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_NONE,
	315	(int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
	316	}
	317
	318	// XXXdbg
	319	if (started_tr) {
	320	tv = time;
	321	VOP_UPDATE(vp, &tv, &tv, 1);
	322
	323	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	324	journal_end_transaction(hfsmp->jnl);
	325	started_tr = 0;
	326	}
	327	if (grabbed_lock) {
	328	hfs_global_shared_lock_release(hfsmp);
	329	grabbed_lock = 0;
	330	}
	331
	332	if (retval == E_NONE) {
	333	off_t filesize;
	334	off_t zero_off;
	335	off_t tail_off;
	336	off_t inval_start;
	337	off_t inval_end;
	338	off_t io_start, io_end;
	339	int lflag;
	340	struct rl_entry *invalid_range;
	341
	342	if (writelimit > fp->ff_size)
	343	filesize = writelimit;
	344	else
	345	filesize = fp->ff_size;
	346
	347	lflag = (ioflag & IO_SYNC);
	348
	349	if (uio->uio_offset <= fp->ff_size) {
	350	zero_off = uio->uio_offset & ~PAGE_MASK_64;
	351
	352	/* Check to see whether the area between the zero_offset and the start
	353	of the transfer to see whether is invalid and should be zero-filled
	354	as part of the transfer:
	355	*/
	356	if (uio->uio_offset > zero_off) {
	357	if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
	358	lflag \|= IO_HEADZEROFILL;
	359	}
	360	} else {
	361	off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
	362
	363	/* The bytes between fp->ff_size and uio->uio_offset must never be
	364	read without being zeroed. The current last block is filled with zeroes
	365	if it holds valid data but in all cases merely do a little bookkeeping
	366	to track the area from the end of the current last page to the start of
	367	the area actually written. For the same reason only the bytes up to the
	368	start of the page where this write will start is invalidated; any remainder
	369	before uio->uio_offset is explicitly zeroed as part of the cluster_write.
	370
	371	Note that inval_start, the start of the page after the current EOF,
	372	may be past the start of the write, in which case the zeroing
	373	will be handled by the cluser_write of the actual data.
	374	*/
	375	inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	376	inval_end = uio->uio_offset & ~PAGE_MASK_64;
	377	zero_off = fp->ff_size;
	378
	379	if ((fp->ff_size & PAGE_MASK_64) &&
	380	(rl_scan(&fp->ff_invalidranges,
	381	eof_page_base,
	382	fp->ff_size - 1,
	383	&invalid_range) != RL_NOOVERLAP)) {
	384	/* The page containing the EOF is not valid, so the
	385	entire page must be made inaccessible now. If the write
	386	starts on a page beyond the page containing the eof
	387	(inval_end > eof_page_base), add the
	388	whole page to the range to be invalidated. Otherwise
	389	(i.e. if the write starts on the same page), zero-fill
	390	the entire page explicitly now:
	391	*/
	392	if (inval_end > eof_page_base) {
	393	inval_start = eof_page_base;
	394	} else {
	395	zero_off = eof_page_base;
	396	};
	397	};
	398
	399	if (inval_start < inval_end) {
	400	/* There's some range of data that's going to be marked invalid */
	401
	402	if (zero_off < inval_start) {
	403	/* The pages between inval_start and inval_end are going to be invalidated,
	404	and the actual write will start on a page past inval_end. Now's the last
	405	chance to zero-fill the page containing the EOF:
	406	*/
	407	retval = cluster_write(vp, (struct uio *) 0,
	408	fp->ff_size, inval_start,
	409	zero_off, (off_t)0, devBlockSize,
	410	lflag \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	411	if (retval) goto ioerr_exit;
	412	};
	413
	414	/* Mark the remaining area of the newly allocated space as invalid: */
	415	rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
	416	cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
	417	zero_off = fp->ff_size = inval_end;
	418	};
	419
	420	if (uio->uio_offset > zero_off) lflag \|= IO_HEADZEROFILL;
	421	};
	422
	423	/* Check to see whether the area between the end of the write and the end of
	424	the page it falls in is invalid and should be zero-filled as part of the transfer:
	425	*/
	426	tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	427	if (tail_off > filesize) tail_off = filesize;
	428	if (tail_off > writelimit) {
	429	if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
	430	lflag \|= IO_TAILZEROFILL;
	431	};
	432	};
	433
	434	/*
	435	* if the write starts beyond the current EOF (possibly advanced in the
	436	* zeroing of the last block, above), then we'll zero fill from the current EOF
	437	* to where the write begins:
	438	*
	439	* NOTE: If (and ONLY if) the portion of the file about to be written is
	440	* before the current EOF it might be marked as invalid now and must be
	441	* made readable (removed from the invalid ranges) before cluster_write
	442	* tries to write it:
	443	*/
	444	io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
	445	io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
	446	if (io_start < fp->ff_size) {
	447	rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
	448	};
	449	retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
	450	tail_off, devBlockSize, lflag \| IO_NOZERODIRTY);
	451
	452	if (uio->uio_offset > fp->ff_size) {
	453	fp->ff_size = uio->uio_offset;
	454
	455	ubc_setsize(vp, fp->ff_size); /* XXX check errors */
	456	}
	457	if (resid > uio->uio_resid)
	458	cp->c_flag \|= C_CHANGE \| C_UPDATE;
	459	}
	460
	461	HFS_KNOTE(vp, NOTE_WRITE);
	462
	463	ioerr_exit:
	464	/*
	465	* If we successfully wrote any data, and we are not the superuser
	466	* we clear the setuid and setgid bits as a precaution against
	467	* tampering.
	468	*/
	469	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
	470	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	471
	472	if (retval) {
	473	if (ioflag & IO_UNIT) {
	474	(void)VOP_TRUNCATE(vp, origFileSize,
	475	ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
	476	uio->uio_offset -= resid - uio->uio_resid;
	477	uio->uio_resid = resid;
	478	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	479	}
	480	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
	481	tv = time;
	482	retval = VOP_UPDATE(vp, &tv, &tv, 1);
	483	}
	484	vcb->vcbWrCnt++;
	485
	486	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) \| DBG_FUNC_END,
	487	(int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
	488
	489	return (retval);
	490	}
	491
	492
	493	#ifdef HFS_SPARSE_DEV
	494	struct hfs_backingstoreinfo {
	495	int signature; /* == 3419115 */
	496	int version; /* version of this struct (1) */
	497	int backingfd; /* disk image file (on backing fs) */
	498	int bandsize; /* sparse disk image band size */
	499	};
	500
	501	#define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo)
	502	#define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8)
	503
	504	#define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
	505	#define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
	506
	507	#endif /* HFS_SPARSE_DEV */
	508
	509	/*
	510
	511	#% ioctl vp U U U
	512	#
	513	vop_ioctl {
	514	IN struct vnode *vp;
	515	IN u_long command;
	516	IN caddr_t data;
	517	IN int fflag;
	518	IN struct ucred *cred;
	519	IN struct proc *p;
	520
	521	*/
	522
	523
	524	/* ARGSUSED */
	525	int
	526	hfs_ioctl(ap)
	527	struct vop_ioctl_args /* {
	528	struct vnode *a_vp;
	529	int a_command;
	530	caddr_t a_data;
	531	int a_fflag;
	532	struct ucred *a_cred;
	533	struct proc *a_p;
	534	} / ap;
	535	{
	536	switch (ap->a_command) {
	537
	538	#ifdef HFS_SPARSE_DEV
	539	case HFS_SETBACKINGSTOREINFO: {
	540	struct hfsmount * hfsmp;
	541	struct vnode * bsfs_rootvp;
	542	struct vnode * di_vp;
	543	struct file * di_fp;
	544	struct hfs_backingstoreinfo *bsdata;
	545	int error = 0;
	546
	547	hfsmp = VTOHFS(ap->a_vp);
	548	if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
	549	return (EALREADY);
	550	}
	551	if (ap->a_p->p_ucred->cr_uid != 0 &&
	552	ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
	553	return (EACCES); /* must be owner of file system */
	554	}
	555	bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
	556	if (bsdata == NULL) {
	557	return (EINVAL);
	558	}
	559	if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
	560	return (error);
	561	}
	562	if (fref(di_fp) == -1) {
	563	return (EBADF);
	564	}
	565	if (di_fp->f_type != DTYPE_VNODE) {
	566	frele(di_fp);
	567	return (EINVAL);
	568	}
	569	di_vp = (struct vnode *)di_fp->f_data;
	570	if (ap->a_vp->v_mount == di_vp->v_mount) {
	571	frele(di_fp);
	572	return (EINVAL);
	573	}
	574
	575	/*
	576	* Obtain the backing fs root vnode and keep a reference
	577	* on it. This reference will be dropped in hfs_unmount.
	578	*/
	579	error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
	580	if (error) {
	581	frele(di_fp);
	582	return (error);
	583	}
	584	VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p); /* Hold on to the reference */
	585
	586	hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
	587	hfsmp->hfs_flags \|= HFS_HAS_SPARSE_DEVICE;
	588	hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
	589	hfsmp->hfs_sparsebandblks *= 4;
	590
	591	frele(di_fp);
	592	return (0);
	593	}
	594	case HFS_CLRBACKINGSTOREINFO: {
	595	struct hfsmount * hfsmp;
	596	struct vnode * tmpvp;
	597
	598	hfsmp = VTOHFS(ap->a_vp);
	599	if (ap->a_p->p_ucred->cr_uid != 0 &&
	600	ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
	601	return (EACCES); /* must be owner of file system */
	602	}
	603	if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
	604	hfsmp->hfs_backingfs_rootvp) {
	605
	606	hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
	607	tmpvp = hfsmp->hfs_backingfs_rootvp;
	608	hfsmp->hfs_backingfs_rootvp = NULLVP;
	609	hfsmp->hfs_sparsebandblks = 0;
	610	vrele(tmpvp);
	611	}
	612	return (0);
	613	}
	614	#endif /* HFS_SPARSE_DEV */
	615
	616	case 6: {
	617	int error;
	618
	619	ap->a_vp->v_flag \|= VFULLFSYNC;
	620	error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
	621	ap->a_vp->v_flag &= ~VFULLFSYNC;
	622
	623	return error;
	624	}
	625	case 5: {
	626	register struct vnode *vp;
	627	register struct cnode *cp;
	628	struct filefork *fp;
	629	int error;
	630
	631	vp = ap->a_vp;
	632	cp = VTOC(vp);
	633	fp = VTOF(vp);
	634
	635	if (vp->v_type != VREG)
	636	return EINVAL;
	637
	638	VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
	639	error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, ap->a_p);
	640	if (error)
	641	return (error);
	642
	643	/*
	644	* used by regression test to determine if
	645	* all the dirty pages (via write) have been cleaned
	646	* after a call to 'fsysnc'.
	647	*/
	648	error = is_file_clean(vp, fp->ff_size);
	649	VOP_UNLOCK(vp, 0, ap->a_p);
	650
	651	return (error);
	652	}
	653
	654	case 1: {
	655	register struct vnode *vp;
	656	register struct radvisory *ra;
	657	register struct cnode *cp;
	658	struct filefork *fp;
	659	int devBlockSize = 0;
	660	int error;
	661
	662	vp = ap->a_vp;
	663
	664	if (vp->v_type != VREG)
	665	return EINVAL;
	666
	667	VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
	668	error = vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY, ap->a_p);
	669	if (error)
	670	return (error);
	671
	672	ra = (struct radvisory *)(ap->a_data);
	673	cp = VTOC(vp);
	674	fp = VTOF(vp);
	675
	676	if (ra->ra_offset >= fp->ff_size) {
	677	VOP_UNLOCK(vp, 0, ap->a_p);
	678	return (EFBIG);
	679	}
	680	VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
	681
	682	error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
	683	VOP_UNLOCK(vp, 0, ap->a_p);
	684
	685	return (error);
	686	}
	687
	688	case 2: /* F_READBOOTBLOCKS */
	689	case 3: /* F_WRITEBOOTBLOCKS */
	690	{
	691	struct vnode *vp = ap->a_vp;
	692	struct vnode *devvp = NULL;
	693	struct fbootstraptransfer btd = (struct fbootstraptransfer )ap->a_data;
	694	int devBlockSize;
	695	int error;
	696	struct iovec aiov;
	697	struct uio auio;
	698	u_long blockNumber;
	699	u_long blockOffset;
	700	u_long xfersize;
	701	struct buf *bp;
	702
	703	if ((vp->v_flag & VROOT) == 0) return EINVAL;
	704	if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
	705
	706	devvp = VTOHFS(vp)->hfs_devvp;
	707	aiov.iov_base = btd->fbt_buffer;
	708	aiov.iov_len = btd->fbt_length;
	709
	710	auio.uio_iov = &aiov;
	711	auio.uio_iovcnt = 1;
	712	auio.uio_offset = btd->fbt_offset;
	713	auio.uio_resid = btd->fbt_length;
	714	auio.uio_segflg = UIO_USERSPACE;
	715	auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
	716	auio.uio_procp = ap->a_p;
	717
	718	VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
	719
	720	while (auio.uio_resid > 0) {
	721	blockNumber = auio.uio_offset / devBlockSize;
	722	error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
	723	if (error) {
	724	if (bp) brelse(bp);
	725	return error;
	726	};
	727
	728	blockOffset = auio.uio_offset % devBlockSize;
	729	xfersize = devBlockSize - blockOffset;
	730	error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
	731	if (error) {
	732	brelse(bp);
	733	return error;
	734	};
	735	if (auio.uio_rw == UIO_WRITE) {
	736	error = VOP_BWRITE(bp);
	737	if (error) return error;
	738	} else {
	739	brelse(bp);
	740	};
	741	};
	742	};
	743	return 0;
	744
	745	case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
	746	{
	747	(time_t )(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
	748	return 0;
	749	}
	750
	751	default:
	752	return (ENOTTY);
	753	}
	754
	755	/* Should never get here */
	756	return 0;
	757	}
	758
	759	/* ARGSUSED */
	760	int
	761	hfs_select(ap)
	762	struct vop_select_args /* {
	763	struct vnode *a_vp;
	764	int a_which;
	765	int a_fflags;
	766	struct ucred *a_cred;
	767	void *a_wql;
	768	struct proc *a_p;
	769	} / ap;
	770	{
	771	/*
	772	* We should really check to see if I/O is possible.
	773	*/
	774	return (1);
	775	}
	776
	777	/*
	778	* Bmap converts a the logical block number of a file to its physical block
	779	* number on the disk.
	780	*/
	781
	782	/*
	783	* vp - address of vnode file the file
	784	* bn - which logical block to convert to a physical block number.
	785	* vpp - returns the vnode for the block special file holding the filesystem
	786	* containing the file of interest
	787	* bnp - address of where to return the filesystem physical block number
	788	#% bmap vp L L L
	789	#% bmap vpp - U -
	790	#
	791	vop_bmap {
	792	IN struct vnode *vp;
	793	IN daddr_t bn;
	794	OUT struct vnode **vpp;
	795	IN daddr_t *bnp;
	796	OUT int *runp;
	797	*/
	798	/*
	799	* Converts a logical block number to a physical block, and optionally returns
	800	* the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
	801	* The physical block number is based on the device block size, currently its 512.
	802	* The block run is returned in logical blocks, and is the REMAINING amount of blocks
	803	*/
	804
	805	int
	806	hfs_bmap(ap)
	807	struct vop_bmap_args /* {
	808	struct vnode *a_vp;
	809	daddr_t a_bn;
	810	struct vnode **a_vpp;
	811	daddr_t *a_bnp;
	812	int *a_runp;
	813	} / ap;
	814	{
	815	struct vnode *vp = ap->a_vp;
	816	struct cnode *cp = VTOC(vp);
	817	struct filefork *fp = VTOF(vp);
	818	struct hfsmount *hfsmp = VTOHFS(vp);
	819	int retval = E_NONE;
	820	daddr_t logBlockSize;
	821	size_t bytesContAvail = 0;
	822	off_t blockposition;
	823	struct proc *p = NULL;
	824	int lockExtBtree;
	825	struct rl_entry *invalid_range;
	826	enum rl_overlaptype overlaptype;
	827
	828	/*
	829	* Check for underlying vnode requests and ensure that logical
	830	* to physical mapping is requested.
	831	*/
	832	if (ap->a_vpp != NULL)
	833	*ap->a_vpp = cp->c_devvp;
	834	if (ap->a_bnp == NULL)
	835	return (0);
	836
	837	/* Only clustered I/O should have delayed allocations. */
	838	DBG_ASSERT(fp->ff_unallocblocks == 0);
	839
	840	logBlockSize = GetLogicalBlockSize(vp);
	841	blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
	842
	843	lockExtBtree = overflow_extents(fp);
	844	if (lockExtBtree) {
	845	p = current_proc();
	846	retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
	847	LK_EXCLUSIVE \| LK_CANRECURSE, p);
	848	if (retval)
	849	return (retval);
	850	}
	851
	852	retval = MacToVFSError(
	853	MapFileBlockC (HFSTOVCB(hfsmp),
	854	(FCB*)fp,
	855	MAXPHYSIO,
	856	blockposition,
	857	ap->a_bnp,
	858	&bytesContAvail));
	859
	860	if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
	861
	862	if (retval == E_NONE) {
	863	/* Adjust the mapping information for invalid file ranges: */
	864	overlaptype = rl_scan(&fp->ff_invalidranges,
	865	blockposition,
	866	blockposition + MAXPHYSIO - 1,
	867	&invalid_range);
	868	if (overlaptype != RL_NOOVERLAP) {
	869	switch(overlaptype) {
	870	case RL_MATCHINGOVERLAP:
	871	case RL_OVERLAPCONTAINSRANGE:
	872	case RL_OVERLAPSTARTSBEFORE:
	873	/* There's no valid block for this byte offset: */
	874	*ap->a_bnp = (daddr_t)-1;
	875	bytesContAvail = invalid_range->rl_end + 1 - blockposition;
	876	break;
	877
	878	case RL_OVERLAPISCONTAINED:
	879	case RL_OVERLAPENDSAFTER:
	880	/* The range of interest hits an invalid block before the end: */
	881	if (invalid_range->rl_start == blockposition) {
	882	/* There's actually no valid information to be had starting here: */
	883	*ap->a_bnp = (daddr_t)-1;
	884	if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
	885	(invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
	886	bytesContAvail = invalid_range->rl_end + 1 - blockposition;
	887	};
	888	} else {
	889	bytesContAvail = invalid_range->rl_start - blockposition;
	890	};
	891	break;
	892	};
	893	if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
	894	};
	895
	896	/* Figure out how many read ahead blocks there are */
	897	if (ap->a_runp != NULL) {
	898	if (can_cluster(logBlockSize)) {
	899	/* Make sure this result never goes negative: */
	900	*ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
	901	} else {
	902	*ap->a_runp = 0;
	903	};
	904	};
	905	};
	906
	907	return (retval);
	908	}
	909
	910	/* blktooff converts logical block number to file offset */
	911
	912	int
	913	hfs_blktooff(ap)
	914	struct vop_blktooff_args /* {
	915	struct vnode *a_vp;
	916	daddr_t a_lblkno;
	917	off_t *a_offset;
	918	} / ap;
	919	{
	920	if (ap->a_vp == NULL)
	921	return (EINVAL);
	922	ap->a_offset = (off_t)ap->a_lblkno PAGE_SIZE_64;
	923
	924	return(0);
	925	}
	926
	927	int
	928	hfs_offtoblk(ap)
	929	struct vop_offtoblk_args /* {
	930	struct vnode *a_vp;
	931	off_t a_offset;
	932	daddr_t *a_lblkno;
	933	} / ap;
	934	{
	935	if (ap->a_vp == NULL)
	936	return (EINVAL);
	937	*ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
	938
	939	return(0);
	940	}
	941
	942	int
	943	hfs_cmap(ap)
	944	struct vop_cmap_args /* {
	945	struct vnode *a_vp;
	946	off_t a_foffset;
	947	size_t a_size;
	948	daddr_t *a_bpn;
	949	size_t *a_run;
	950	void *a_poff;
	951	} / ap;
	952	{
	953	struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
	954	struct filefork *fp = VTOF(ap->a_vp);
	955	size_t bytesContAvail = 0;
	956	int retval = E_NONE;
	957	int lockExtBtree = 0;
	958	struct proc *p = NULL;
	959	struct rl_entry *invalid_range;
	960	enum rl_overlaptype overlaptype;
	961	int started_tr = 0, grabbed_lock = 0;
	962	struct timeval tv;
	963
	964	/*
	965	* Check for underlying vnode requests and ensure that logical
	966	* to physical mapping is requested.
	967	*/
	968	if (ap->a_bpn == NULL)
	969	return (0);
	970
	971	p = current_proc();
	972
	973	if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
	974	/*
	975	* File blocks are getting remapped. Wait until its finished.
	976	*/
	977	SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
	978	(void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
	979	if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
	980	panic("hfs_cmap: no mappable blocks");
	981	}
	982
	983	retry:
	984	if (fp->ff_unallocblocks) {
	985	lockExtBtree = 1;
	986
	987	// XXXdbg
	988	hfs_global_shared_lock_acquire(hfsmp);
	989	grabbed_lock = 1;
	990
	991	if (hfsmp->jnl) {
	992	if (journal_start_transaction(hfsmp->jnl) != 0) {
	993	hfs_global_shared_lock_release(hfsmp);
	994	return EINVAL;
	995	} else {
	996	started_tr = 1;
	997	}
	998	}
	999
	1000	if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE \| LK_CANRECURSE, p)) {
	1001	if (started_tr) {
	1002	journal_end_transaction(hfsmp->jnl);
	1003	}
	1004	if (grabbed_lock) {
	1005	hfs_global_shared_lock_release(hfsmp);
	1006	}
	1007	return (retval);
	1008	}
	1009	} else if (overflow_extents(fp)) {
	1010	lockExtBtree = 1;
	1011	if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE \| LK_CANRECURSE, p)) {
	1012	return retval;
	1013	}
	1014	}
	1015
	1016	/*
	1017	* Check for any delayed allocations.
	1018	*/
	1019	if (fp->ff_unallocblocks) {
	1020	SInt64 reqbytes, actbytes;
	1021
	1022	//
	1023	// Make sure we have a transaction. It's possible
	1024	// that we came in and fp->ff_unallocblocks was zero
	1025	// but during the time we blocked acquiring the extents
	1026	// btree, ff_unallocblocks became non-zero and so we
	1027	// will need to start a transaction.
	1028	//
	1029	if (hfsmp->jnl && started_tr == 0) {
	1030	if (lockExtBtree) {
	1031	(void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
	1032	lockExtBtree = 0;
	1033	}
	1034
	1035	goto retry;
	1036	}
	1037
	1038	reqbytes = (SInt64)fp->ff_unallocblocks *
	1039	(SInt64)HFSTOVCB(hfsmp)->blockSize;
	1040	/*
	1041	* Release the blocks on loan and aquire some real ones.
	1042	* Note that we can race someone else for these blocks
	1043	* (and lose) so cmap needs to handle a failure here.
	1044	* Currently this race can't occur because all allocations
	1045	* are protected by an exclusive lock on the Extents
	1046	* Overflow file.
	1047	*/
	1048	HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
	1049	FTOC(fp)->c_blocks -= fp->ff_unallocblocks;
	1050	fp->ff_blocks -= fp->ff_unallocblocks;
	1051	fp->ff_unallocblocks = 0;
	1052
	1053	/* Files that are changing size are not hot file candidates. */
	1054	if (hfsmp->hfc_stage == HFC_RECORDING) {
	1055	fp->ff_bytesread = 0;
	1056	}
	1057	while (retval == 0 && reqbytes > 0) {
	1058	retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
	1059	(FCB*)fp, reqbytes, 0,
	1060	kEFAllMask \| kEFNoClumpMask, &actbytes));
	1061	if (retval == 0 && actbytes == 0)
	1062	retval = ENOSPC;
	1063
	1064	if (retval) {
	1065	fp->ff_unallocblocks =
	1066	reqbytes / HFSTOVCB(hfsmp)->blockSize;
	1067	HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
	1068	FTOC(fp)->c_blocks += fp->ff_unallocblocks;
	1069	fp->ff_blocks += fp->ff_unallocblocks;
	1070	}
	1071	reqbytes -= actbytes;
	1072	}
	1073
	1074	if (retval) {
	1075	(void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
	1076	VTOC(ap->a_vp)->c_flag \|= C_MODIFIED;
	1077	if (started_tr) {
	1078	tv = time;
	1079	VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
	1080
	1081	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1082	journal_end_transaction(hfsmp->jnl);
	1083	}
	1084	if (grabbed_lock) {
	1085	hfs_global_shared_lock_release(hfsmp);
	1086	}
	1087	return (retval);
	1088	}
	1089	}
	1090
	1091	retval = MacToVFSError(
	1092	MapFileBlockC (HFSTOVCB(hfsmp),
	1093	(FCB *)fp,
	1094	ap->a_size,
	1095	ap->a_foffset,
	1096	ap->a_bpn,
	1097	&bytesContAvail));
	1098
	1099	if (lockExtBtree)
	1100	(void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
	1101
	1102	// XXXdbg
	1103	if (started_tr) {
	1104	tv = time;
	1105	retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
	1106
	1107	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1108	journal_end_transaction(hfsmp->jnl);
	1109	started_tr = 0;
	1110	}
	1111	if (grabbed_lock) {
	1112	hfs_global_shared_lock_release(hfsmp);
	1113	grabbed_lock = 0;
	1114	}
	1115
	1116	if (retval == E_NONE) {
	1117	/* Adjust the mapping information for invalid file ranges: */
	1118	overlaptype = rl_scan(&fp->ff_invalidranges,
	1119	ap->a_foffset,
	1120	ap->a_foffset + (off_t)bytesContAvail - 1,
	1121	&invalid_range);
	1122	if (overlaptype != RL_NOOVERLAP) {
	1123	switch(overlaptype) {
	1124	case RL_MATCHINGOVERLAP:
	1125	case RL_OVERLAPCONTAINSRANGE:
	1126	case RL_OVERLAPSTARTSBEFORE:
	1127	/* There's no valid block for this byte offset: */
	1128	*ap->a_bpn = (daddr_t)-1;
	1129
	1130	/* There's no point limiting the amount to be returned if the
	1131	invalid range that was hit extends all the way to the EOF
	1132	(i.e. there's no valid bytes between the end of this range
	1133	and the file's EOF):
	1134	*/
	1135	if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
	1136	(invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
	1137	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	1138	};
	1139	break;
	1140
	1141	case RL_OVERLAPISCONTAINED:
	1142	case RL_OVERLAPENDSAFTER:
	1143	/* The range of interest hits an invalid block before the end: */
	1144	if (invalid_range->rl_start == ap->a_foffset) {
	1145	/* There's actually no valid information to be had starting here: */
	1146	*ap->a_bpn = (daddr_t)-1;
	1147	if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
	1148	(invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
	1149	bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
	1150	};
	1151	} else {
	1152	bytesContAvail = invalid_range->rl_start - ap->a_foffset;
	1153	};
	1154	break;
	1155	};
	1156	if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
	1157	};
	1158
	1159	if (ap->a_run) *ap->a_run = bytesContAvail;
	1160	};
	1161
	1162	if (ap->a_poff)
	1163	(int )ap->a_poff = 0;
	1164
	1165	return (retval);
	1166	}
	1167
	1168
	1169	/*
	1170	* Read or write a buffer that is not contiguous on disk. We loop over
	1171	* each device block, copying to or from caller's buffer.
	1172	*
	1173	* We could be a bit more efficient by transferring as much data as is
	1174	* contiguous. But since this routine should rarely be called, and that
	1175	* would be more complicated; best to keep it simple.
	1176	*/
	1177	static int
	1178	hfs_strategy_fragmented(struct buf *bp)
	1179	{
	1180	register struct vnode *vp = bp->b_vp;
	1181	register struct cnode *cp = VTOC(vp);
	1182	register struct vnode *devvp = cp->c_devvp;
	1183	caddr_t ioaddr; /* Address of fragment within bp */
	1184	struct buf frag = NULL; / For reading or writing a single block */
	1185	int retval = 0;
	1186	long remaining; /* Bytes (in bp) left to transfer */
	1187	off_t offset; /* Logical offset of current fragment in vp */
	1188	u_long block_size; /* Size of one device block (and one I/O) */
	1189
	1190	/* Make sure we redo this mapping for the next I/O */
	1191	bp->b_blkno = bp->b_lblkno;
	1192
	1193	/* Set up the logical position and number of bytes to read/write */
	1194	offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
	1195	block_size = VTOHFS(vp)->hfs_phys_block_size;
	1196
	1197	/* Get an empty buffer to do the deblocking */
	1198	frag = geteblk(block_size);
	1199	if (ISSET(bp->b_flags, B_READ))
	1200	SET(frag->b_flags, B_READ);
	1201
	1202	for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
	1203	ioaddr += block_size, offset += block_size,
	1204	remaining -= block_size) {
	1205	frag->b_resid = frag->b_bcount;
	1206	CLR(frag->b_flags, B_DONE);
	1207
	1208	/* Map the current position to a physical block number */
	1209	retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
	1210	NULL, NULL);
	1211	if (retval != 0)
	1212	break;
	1213
	1214	/*
	1215	* Did we try to read a hole?
	1216	* (Should never happen for metadata!)
	1217	*/
	1218	if ((long)frag->b_lblkno == -1) {
	1219	bzero(ioaddr, block_size);
	1220	continue;
	1221	}
	1222
	1223	/* If writing, copy before I/O */
	1224	if (!ISSET(bp->b_flags, B_READ))
	1225	bcopy(ioaddr, frag->b_data, block_size);
	1226
	1227	/* Call the device to do the I/O and wait for it */
	1228	frag->b_blkno = frag->b_lblkno;
	1229	frag->b_vp = devvp; /* Used to dispatch via VOP_STRATEGY */
	1230	frag->b_dev = devvp->v_rdev;
	1231	retval = VOP_STRATEGY(frag);
	1232	frag->b_vp = NULL;
	1233	if (retval != 0)
	1234	break;
	1235	retval = biowait(frag);
	1236	if (retval != 0)
	1237	break;
	1238
	1239	/* If reading, copy after the I/O */
	1240	if (ISSET(bp->b_flags, B_READ))
	1241	bcopy(frag->b_data, ioaddr, block_size);
	1242	}
	1243
	1244	frag->b_vp = NULL;
	1245	//
	1246	// XXXdbg - in the case that this is a meta-data block, it won't affect
	1247	// the journal because this bp is for a physical disk block,
	1248	// not a logical block that is part of the catalog or extents
	1249	// files.
	1250	SET(frag->b_flags, B_INVAL);
	1251	brelse(frag);
	1252
	1253	if ((bp->b_error = retval) != 0)
	1254	SET(bp->b_flags, B_ERROR);
	1255
	1256	biodone(bp); /* This I/O is now complete */
	1257	return retval;
	1258	}
	1259
	1260
	1261	/*
	1262	* Calculate the logical to physical mapping if not done already,
	1263	* then call the device strategy routine.
	1264	#
	1265	#vop_strategy {
	1266	# IN struct buf *bp;
	1267	*/
	1268	int
	1269	hfs_strategy(ap)
	1270	struct vop_strategy_args /* {
	1271	struct buf *a_bp;
	1272	} / ap;
	1273	{
	1274	register struct buf *bp = ap->a_bp;
	1275	register struct vnode *vp = bp->b_vp;
	1276	register struct cnode *cp = VTOC(vp);
	1277	int retval = 0;
	1278	off_t offset;
	1279	size_t bytes_contig;
	1280
	1281	if ( !(bp->b_flags & B_VECTORLIST)) {
	1282	if (vp->v_type == VBLK \|\| vp->v_type == VCHR)
	1283	panic("hfs_strategy: device vnode passed!");
	1284
	1285	if (bp->b_flags & B_PAGELIST) {
	1286	/*
	1287	* If we have a page list associated with this bp,
	1288	* then go through cluster_bp since it knows how to
	1289	* deal with a page request that might span non-
	1290	* contiguous physical blocks on the disk...
	1291	*/
	1292	retval = cluster_bp(bp);
	1293	vp = cp->c_devvp;
	1294	bp->b_dev = vp->v_rdev;
	1295
	1296	return (retval);
	1297	}
	1298
	1299	/*
	1300	* If we don't already know the filesystem relative block
	1301	* number then get it using VOP_BMAP(). If VOP_BMAP()
	1302	* returns the block number as -1 then we've got a hole in
	1303	* the file. Although HFS filesystems don't create files with
	1304	* holes, invalidating of subranges of the file (lazy zero
	1305	* filling) may create such a situation.
	1306	*/
	1307	if (bp->b_blkno == bp->b_lblkno) {
	1308	offset = (off_t) bp->b_lblkno *
	1309	(off_t) GetLogicalBlockSize(vp);
	1310
	1311	if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
	1312	&bp->b_blkno, &bytes_contig, NULL))) {
	1313	bp->b_error = retval;
	1314	bp->b_flags \|= B_ERROR;
	1315	biodone(bp);
	1316	return (retval);
	1317	}
	1318	if (bytes_contig < bp->b_bcount)
	1319	{
	1320	/*
	1321	* We were asked to read a block that wasn't
	1322	* contiguous, so we have to read each of the
	1323	* pieces and copy them into the buffer.
	1324	* Since ordinary file I/O goes through
	1325	* cluster_io (which won't ask us for
	1326	* discontiguous data), this is probably an
	1327	* attempt to read or write metadata.
	1328	*/
	1329	return hfs_strategy_fragmented(bp);
	1330	}
	1331	if ((long)bp->b_blkno == -1)
	1332	clrbuf(bp);
	1333	}
	1334	if ((long)bp->b_blkno == -1) {
	1335	biodone(bp);
	1336	return (0);
	1337	}
	1338	if (bp->b_validend == 0) {
	1339	/*
	1340	* Record the exact size of the I/O transfer about to
	1341	* be made:
	1342	*/
	1343	bp->b_validend = bp->b_bcount;
	1344	}
	1345	}
	1346	vp = cp->c_devvp;
	1347	bp->b_dev = vp->v_rdev;
	1348
	1349	return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
	1350	}
	1351
	1352
	1353	static int do_hfs_truncate(ap)
	1354	struct vop_truncate_args /* {
	1355	struct vnode *a_vp;
	1356	off_t a_length;
	1357	int a_flags;
	1358	struct ucred *a_cred;
	1359	struct proc *a_p;
	1360	} / ap;
	1361	{
	1362	register struct vnode *vp = ap->a_vp;
	1363	register struct cnode *cp = VTOC(vp);
	1364	struct filefork *fp = VTOF(vp);
	1365	off_t length;
	1366	long vflags;
	1367	struct timeval tv;
	1368	int retval;
	1369	off_t bytesToAdd;
	1370	off_t actualBytesAdded;
	1371	off_t filebytes;
	1372	u_long fileblocks;
	1373	int blksize;
	1374	struct hfsmount *hfsmp;
	1375
	1376	if (vp->v_type != VREG && vp->v_type != VLNK)
	1377	return (EISDIR); /* cannot truncate an HFS directory! */
	1378
	1379	length = ap->a_length;
	1380	blksize = VTOVCB(vp)->blockSize;
	1381	fileblocks = fp->ff_blocks;
	1382	filebytes = (off_t)fileblocks * (off_t)blksize;
	1383
	1384	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_START,
	1385	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	1386
	1387	if (length < 0)
	1388	return (EINVAL);
	1389
	1390	if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
	1391	return (EFBIG);
	1392
	1393	hfsmp = VTOHFS(vp);
	1394
	1395	tv = time;
	1396	retval = E_NONE;
	1397
	1398	/* Files that are changing size are not hot file candidates. */
	1399	if (hfsmp->hfc_stage == HFC_RECORDING) {
	1400	fp->ff_bytesread = 0;
	1401	}
	1402
	1403	/*
	1404	* We cannot just check if fp->ff_size == length (as an optimization)
	1405	* since there may be extra physical blocks that also need truncation.
	1406	*/
	1407	#if QUOTA
	1408	if (retval = hfs_getinoquota(cp))
	1409	return(retval);
	1410	#endif /* QUOTA */
	1411
	1412	/*
	1413	* Lengthen the size of the file. We must ensure that the
	1414	* last byte of the file is allocated. Since the smallest
	1415	* value of ff_size is 0, length will be at least 1.
	1416	*/
	1417	if (length > fp->ff_size) {
	1418	#if QUOTA
	1419	retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
	1420	ap->a_cred, 0);
	1421	if (retval)
	1422	goto Err_Exit;
	1423	#endif /* QUOTA */
	1424	/*
	1425	* If we don't have enough physical space then
	1426	* we need to extend the physical size.
	1427	*/
	1428	if (length > filebytes) {
	1429	int eflags;
	1430	u_long blockHint = 0;
	1431
	1432	/* All or nothing and don't round up to clumpsize. */
	1433	eflags = kEFAllMask \| kEFNoClumpMask;
	1434
	1435	if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
	1436	eflags \|= kEFReserveMask; /* keep a reserve */
	1437
	1438	/*
	1439	* Allocate Journal and Quota files in metadata zone.
	1440	*/
	1441	if (filebytes == 0 &&
	1442	hfsmp->hfs_flags & HFS_METADATA_ZONE &&
	1443	hfs_virtualmetafile(cp)) {
	1444	eflags \|= kEFMetadataMask;
	1445	blockHint = hfsmp->hfs_metazone_start;
	1446	}
	1447	// XXXdbg
	1448	hfs_global_shared_lock_acquire(hfsmp);
	1449	if (hfsmp->jnl) {
	1450	if (journal_start_transaction(hfsmp->jnl) != 0) {
	1451	retval = EINVAL;
	1452	goto Err_Exit;
	1453	}
	1454	}
	1455
	1456	/* lock extents b-tree (also protects volume bitmap) */
	1457	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
	1458	if (retval) {
	1459	if (hfsmp->jnl) {
	1460	journal_end_transaction(hfsmp->jnl);
	1461	}
	1462	hfs_global_shared_lock_release(hfsmp);
	1463
	1464	goto Err_Exit;
	1465	}
	1466
	1467	while ((length > filebytes) && (retval == E_NONE)) {
	1468	bytesToAdd = length - filebytes;
	1469	retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
	1470	(FCB*)fp,
	1471	bytesToAdd,
	1472	blockHint,
	1473	eflags,
	1474	&actualBytesAdded));
	1475
	1476	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	1477	if (actualBytesAdded == 0 && retval == E_NONE) {
	1478	if (length > filebytes)
	1479	length = filebytes;
	1480	break;
	1481	}
	1482	} /* endwhile */
	1483
	1484	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
	1485
	1486	// XXXdbg
	1487	if (hfsmp->jnl) {
	1488	tv = time;
	1489	VOP_UPDATE(vp, &tv, &tv, 1);
	1490
	1491	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1492	journal_end_transaction(hfsmp->jnl);
	1493	}
	1494	hfs_global_shared_lock_release(hfsmp);
	1495
	1496	if (retval)
	1497	goto Err_Exit;
	1498
	1499	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	1500	(int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
	1501	}
	1502
	1503	if (!(ap->a_flags & IO_NOZEROFILL)) {
	1504	if (UBCINFOEXISTS(vp) && retval == E_NONE) {
	1505	struct rl_entry *invalid_range;
	1506	int devBlockSize;
	1507	off_t zero_limit;
	1508
	1509	zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
	1510	if (length < zero_limit) zero_limit = length;
	1511
	1512	if (length > fp->ff_size) {
	1513	/* Extending the file: time to fill out the current last page w. zeroes? */
	1514	if ((fp->ff_size & PAGE_MASK_64) &&
	1515	(rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
	1516	fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
	1517
	1518	/* There's some valid data at the start of the (current) last page
	1519	of the file, so zero out the remainder of that page to ensure the
	1520	entire page contains valid data. Since there is no invalid range
	1521	possible past the (current) eof, there's no need to remove anything
	1522	from the invalid range list before calling cluster_write(): */
	1523	VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
	1524	retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
	1525	fp->ff_size, (off_t)0, devBlockSize,
	1526	(ap->a_flags & IO_SYNC) \| IO_HEADZEROFILL \| IO_NOZERODIRTY);
	1527	if (retval) goto Err_Exit;
	1528
	1529	/* Merely invalidate the remaining area, if necessary: */
	1530	if (length > zero_limit) {
	1531	rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
	1532	cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
	1533	}
	1534	} else {
	1535	/* The page containing the (current) eof is invalid: just add the
	1536	remainder of the page to the invalid list, along with the area
	1537	being newly allocated:
	1538	*/
	1539	rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
	1540	cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
	1541	};
	1542	}
	1543	} else {
	1544	panic("hfs_truncate: invoked on non-UBC object?!");
	1545	};
	1546	}
	1547	cp->c_flag \|= C_UPDATE;
	1548	fp->ff_size = length;
	1549
	1550	if (UBCISVALID(vp))
	1551	ubc_setsize(vp, fp->ff_size); /* XXX check errors */
	1552
	1553	} else { /* Shorten the size of the file */
	1554
	1555	if (fp->ff_size > length) {
	1556	/*
	1557	* Any buffers that are past the truncation point need to be
	1558	* invalidated (to maintain buffer cache consistency). For
	1559	* simplicity, we invalidate all the buffers by calling vinvalbuf.
	1560	*/
	1561	if (UBCISVALID(vp))
	1562	ubc_setsize(vp, length); /* XXX check errors */
	1563
	1564	vflags = ((length > 0) ? V_SAVE : 0) \| V_SAVEMETA;
	1565	retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
	1566
	1567	/* Any space previously marked as invalid is now irrelevant: */
	1568	rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
	1569	}
	1570
	1571	/*
	1572	* Account for any unmapped blocks. Note that the new
	1573	* file length can still end up with unmapped blocks.
	1574	*/
	1575	if (fp->ff_unallocblocks > 0) {
	1576	u_int32_t finalblks;
	1577
	1578	/* lock extents b-tree */
	1579	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
	1580	LK_EXCLUSIVE, ap->a_p);
	1581	if (retval)
	1582	goto Err_Exit;
	1583
	1584	VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
	1585	cp->c_blocks -= fp->ff_unallocblocks;
	1586	fp->ff_blocks -= fp->ff_unallocblocks;
	1587	fp->ff_unallocblocks = 0;
	1588
	1589	finalblks = (length + blksize - 1) / blksize;
	1590	if (finalblks > fp->ff_blocks) {
	1591	/* calculate required unmapped blocks */
	1592	fp->ff_unallocblocks = finalblks - fp->ff_blocks;
	1593	VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
	1594	cp->c_blocks += fp->ff_unallocblocks;
	1595	fp->ff_blocks += fp->ff_unallocblocks;
	1596	}
	1597	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
	1598	LK_RELEASE, ap->a_p);
	1599	}
	1600
	1601	/*
	1602	* For a TBE process the deallocation of the file blocks is
	1603	* delayed until the file is closed. And hfs_close calls
	1604	* truncate with the IO_NDELAY flag set. So when IO_NDELAY
	1605	* isn't set, we make sure this isn't a TBE process.
	1606	*/
	1607	if ((ap->a_flags & IO_NDELAY) \|\| (!ISSET(ap->a_p->p_flag, P_TBE))) {
	1608	#if QUOTA
	1609	off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
	1610	#endif /* QUOTA */
	1611	// XXXdbg
	1612	hfs_global_shared_lock_acquire(hfsmp);
	1613	if (hfsmp->jnl) {
	1614	if (journal_start_transaction(hfsmp->jnl) != 0) {
	1615	retval = EINVAL;
	1616	goto Err_Exit;
	1617	}
	1618	}
	1619
	1620	/* lock extents b-tree (also protects volume bitmap) */
	1621	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
	1622	if (retval) {
	1623	if (hfsmp->jnl) {
	1624	journal_end_transaction(hfsmp->jnl);
	1625	}
	1626	hfs_global_shared_lock_release(hfsmp);
	1627	goto Err_Exit;
	1628	}
	1629
	1630	if (fp->ff_unallocblocks == 0)
	1631	retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
	1632	(FCB*)fp, length, false));
	1633
	1634	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
	1635
	1636	// XXXdbg
	1637	if (hfsmp->jnl) {
	1638	tv = time;
	1639	VOP_UPDATE(vp, &tv, &tv, 1);
	1640
	1641	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1642	journal_end_transaction(hfsmp->jnl);
	1643	}
	1644	hfs_global_shared_lock_release(hfsmp);
	1645
	1646	filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
	1647	if (retval)
	1648	goto Err_Exit;
	1649	#if QUOTA
	1650	/* These are bytesreleased */
	1651	(void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
	1652	#endif /* QUOTA */
	1653	}
	1654	/* Only set update flag if the logical length changes */
	1655	if (fp->ff_size != length)
	1656	cp->c_flag \|= C_UPDATE;
	1657	fp->ff_size = length;
	1658	}
	1659	cp->c_flag \|= C_CHANGE;
	1660	retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
	1661	if (retval) {
	1662	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_NONE,
	1663	-1, -1, -1, retval, 0);
	1664	}
	1665
	1666	Err_Exit:
	1667
	1668	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) \| DBG_FUNC_END,
	1669	(int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
	1670
	1671	return (retval);
	1672	}
	1673
	1674
	1675	/*
	1676	#
	1677	#% truncate vp L L L
	1678	#
	1679	vop_truncate {
	1680	IN struct vnode *vp;
	1681	IN off_t length;
	1682	IN int flags; (IO_SYNC)
	1683	IN struct ucred *cred;
	1684	IN struct proc *p;
	1685	};
	1686	* Truncate a cnode to at most length size, freeing (or adding) the
	1687	* disk blocks.
	1688	*/
	1689	int hfs_truncate(ap)
	1690	struct vop_truncate_args /* {
	1691	struct vnode *a_vp;
	1692	off_t a_length;
	1693	int a_flags;
	1694	struct ucred *a_cred;
	1695	struct proc *a_p;
	1696	} / ap;
	1697	{
	1698	register struct vnode *vp = ap->a_vp;
	1699	register struct cnode *cp = VTOC(vp);
	1700	struct filefork *fp = VTOF(vp);
	1701	off_t length;
	1702	off_t filebytes;
	1703	u_long fileblocks;
	1704	int blksize, error;
	1705	u_int64_t nsize;
	1706
	1707	if (vp->v_type != VREG && vp->v_type != VLNK)
	1708	return (EISDIR); /* cannot truncate an HFS directory! */
	1709
	1710	length = ap->a_length;
	1711	blksize = VTOVCB(vp)->blockSize;
	1712	fileblocks = fp->ff_blocks;
	1713	filebytes = (off_t)fileblocks * (off_t)blksize;
	1714
	1715	// have to loop truncating or growing files that are
	1716	// really big because otherwise transactions can get
	1717	// enormous and consume too many kernel resources.
	1718	if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
	1719	while (filebytes > length) {
	1720	if ((filebytes - length) > HFS_BIGFILE_SIZE) {
	1721	filebytes -= HFS_BIGFILE_SIZE;
	1722	} else {
	1723	filebytes = length;
	1724	}
	1725
	1726	ap->a_length = filebytes;
	1727	error = do_hfs_truncate(ap);
	1728	if (error)
	1729	break;
	1730	}
	1731	} else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
	1732	while (filebytes < length) {
	1733	if ((length - filebytes) > HFS_BIGFILE_SIZE) {
	1734	filebytes += HFS_BIGFILE_SIZE;
	1735	} else {
	1736	filebytes = (length - filebytes);
	1737	}
	1738
	1739	ap->a_length = filebytes;
	1740	error = do_hfs_truncate(ap);
	1741	if (error)
	1742	break;
	1743	}
	1744	} else {
	1745	error = do_hfs_truncate(ap);
	1746	}
	1747
	1748	return error;
	1749	}
	1750
	1751
	1752
	1753	/*
	1754	#
	1755	#% allocate vp L L L
	1756	#
	1757	vop_allocate {
	1758	IN struct vnode *vp;
	1759	IN off_t length;
	1760	IN int flags;
	1761	OUT off_t *bytesallocated;
	1762	IN off_t offset;
	1763	IN struct ucred *cred;
	1764	IN struct proc *p;
	1765	};
	1766	* allocate a cnode to at most length size
	1767	*/
	1768	int hfs_allocate(ap)
	1769	struct vop_allocate_args /* {
	1770	struct vnode *a_vp;
	1771	off_t a_length;
	1772	u_int32_t a_flags;
	1773	off_t *a_bytesallocated;
	1774	off_t a_offset;
	1775	struct ucred *a_cred;
	1776	struct proc *a_p;
	1777	} / ap;
	1778	{
	1779	struct vnode *vp = ap->a_vp;
	1780	struct cnode *cp = VTOC(vp);
	1781	struct filefork *fp = VTOF(vp);
	1782	ExtendedVCB *vcb = VTOVCB(vp);
	1783	off_t length = ap->a_length;
	1784	off_t startingPEOF;
	1785	off_t moreBytesRequested;
	1786	off_t actualBytesAdded;
	1787	off_t filebytes;
	1788	u_long fileblocks;
	1789	long vflags;
	1790	struct timeval tv;
	1791	int retval, retval2;
	1792	UInt32 blockHint;
	1793	UInt32 extendFlags; /* For call to ExtendFileC */
	1794	struct hfsmount *hfsmp;
	1795
	1796	hfsmp = VTOHFS(vp);
	1797
	1798	*(ap->a_bytesallocated) = 0;
	1799	fileblocks = fp->ff_blocks;
	1800	filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
	1801
	1802	if (length < (off_t)0)
	1803	return (EINVAL);
	1804	if (vp->v_type != VREG)
	1805	return (EISDIR);
	1806	if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
	1807	return (EINVAL);
	1808
	1809	/* Fill in the flags word for the call to Extend the file */
	1810
	1811	extendFlags = kEFNoClumpMask;
	1812	if (ap->a_flags & ALLOCATECONTIG)
	1813	extendFlags \|= kEFContigMask;
	1814	if (ap->a_flags & ALLOCATEALL)
	1815	extendFlags \|= kEFAllMask;
	1816	if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
	1817	extendFlags \|= kEFReserveMask;
	1818
	1819	tv = time;
	1820	retval = E_NONE;
	1821	blockHint = 0;
	1822	startingPEOF = filebytes;
	1823
	1824	if (ap->a_flags & ALLOCATEFROMPEOF)
	1825	length += filebytes;
	1826	else if (ap->a_flags & ALLOCATEFROMVOL)
	1827	blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
	1828
	1829	/* If no changes are necesary, then we're done */
	1830	if (filebytes == length)
	1831	goto Std_Exit;
	1832
	1833	/*
	1834	* Lengthen the size of the file. We must ensure that the
	1835	* last byte of the file is allocated. Since the smallest
	1836	* value of filebytes is 0, length will be at least 1.
	1837	*/
	1838	if (length > filebytes) {
	1839	moreBytesRequested = length - filebytes;
	1840
	1841	#if QUOTA
	1842	retval = hfs_chkdq(cp,
	1843	(int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
	1844	ap->a_cred, 0);
	1845	if (retval)
	1846	return (retval);
	1847
	1848	#endif /* QUOTA */
	1849	/*
	1850	* Metadata zone checks.
	1851	*/
	1852	if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
	1853	/*
	1854	* Allocate Journal and Quota files in metadata zone.
	1855	*/
	1856	if (hfs_virtualmetafile(cp)) {
	1857	extendFlags \|= kEFMetadataMask;
	1858	blockHint = hfsmp->hfs_metazone_start;
	1859	} else if ((blockHint >= hfsmp->hfs_metazone_start) &&
	1860	(blockHint <= hfsmp->hfs_metazone_end)) {
	1861	/*
	1862	* Move blockHint outside metadata zone.
	1863	*/
	1864	blockHint = hfsmp->hfs_metazone_end + 1;
	1865	}
	1866	}
	1867
	1868	// XXXdbg
	1869	hfs_global_shared_lock_acquire(hfsmp);
	1870	if (hfsmp->jnl) {
	1871	if (journal_start_transaction(hfsmp->jnl) != 0) {
	1872	retval = EINVAL;
	1873	goto Err_Exit;
	1874	}
	1875	}
	1876
	1877	/* lock extents b-tree (also protects volume bitmap) */
	1878	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
	1879	if (retval) {
	1880	if (hfsmp->jnl) {
	1881	journal_end_transaction(hfsmp->jnl);
	1882	}
	1883	hfs_global_shared_lock_release(hfsmp);
	1884	goto Err_Exit;
	1885	}
	1886
	1887	retval = MacToVFSError(ExtendFileC(vcb,
	1888	(FCB*)fp,
	1889	moreBytesRequested,
	1890	blockHint,
	1891	extendFlags,
	1892	&actualBytesAdded));
	1893
	1894	*(ap->a_bytesallocated) = actualBytesAdded;
	1895	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	1896
	1897	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
	1898
	1899	// XXXdbg
	1900	if (hfsmp->jnl) {
	1901	tv = time;
	1902	VOP_UPDATE(vp, &tv, &tv, 1);
	1903
	1904	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1905	journal_end_transaction(hfsmp->jnl);
	1906	}
	1907	hfs_global_shared_lock_release(hfsmp);
	1908
	1909	/*
	1910	* if we get an error and no changes were made then exit
	1911	* otherwise we must do the VOP_UPDATE to reflect the changes
	1912	*/
	1913	if (retval && (startingPEOF == filebytes))
	1914	goto Err_Exit;
	1915
	1916	/*
	1917	* Adjust actualBytesAdded to be allocation block aligned, not
	1918	* clump size aligned.
	1919	* NOTE: So what we are reporting does not affect reality
	1920	* until the file is closed, when we truncate the file to allocation
	1921	* block size.
	1922	*/
	1923	if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
	1924	*(ap->a_bytesallocated) =
	1925	roundup(moreBytesRequested, (off_t)vcb->blockSize);
	1926
	1927	} else { /* Shorten the size of the file */
	1928
	1929	if (fp->ff_size > length) {
	1930	/*
	1931	* Any buffers that are past the truncation point need to be
	1932	* invalidated (to maintain buffer cache consistency). For
	1933	* simplicity, we invalidate all the buffers by calling vinvalbuf.
	1934	*/
	1935	vflags = ((length > 0) ? V_SAVE : 0) \| V_SAVEMETA;
	1936	(void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
	1937	}
	1938
	1939	// XXXdbg
	1940	hfs_global_shared_lock_acquire(hfsmp);
	1941	if (hfsmp->jnl) {
	1942	if (journal_start_transaction(hfsmp->jnl) != 0) {
	1943	retval = EINVAL;
	1944	goto Err_Exit;
	1945	}
	1946	}
	1947
	1948	/* lock extents b-tree (also protects volume bitmap) */
	1949	retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
	1950	if (retval) {
	1951	if (hfsmp->jnl) {
	1952	journal_end_transaction(hfsmp->jnl);
	1953	}
	1954	hfs_global_shared_lock_release(hfsmp);
	1955
	1956	goto Err_Exit;
	1957	}
	1958
	1959	retval = MacToVFSError(
	1960	TruncateFileC(
	1961	vcb,
	1962	(FCB*)fp,
	1963	length,
	1964	false));
	1965	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
	1966	filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
	1967
	1968	if (hfsmp->jnl) {
	1969	tv = time;
	1970	VOP_UPDATE(vp, &tv, &tv, 1);
	1971
	1972	hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	1973	journal_end_transaction(hfsmp->jnl);
	1974	}
	1975	hfs_global_shared_lock_release(hfsmp);
	1976
	1977
	1978	/*
	1979	* if we get an error and no changes were made then exit
	1980	* otherwise we must do the VOP_UPDATE to reflect the changes
	1981	*/
	1982	if (retval && (startingPEOF == filebytes)) goto Err_Exit;
	1983	#if QUOTA
	1984	/* These are bytesreleased */
	1985	(void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
	1986	#endif /* QUOTA */
	1987
	1988	if (fp->ff_size > filebytes) {
	1989	fp->ff_size = filebytes;
	1990
	1991	if (UBCISVALID(vp))
	1992	ubc_setsize(vp, fp->ff_size); /* XXX check errors */
	1993	}
	1994	}
	1995
	1996	Std_Exit:
	1997	cp->c_flag \|= C_CHANGE \| C_UPDATE;
	1998	retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
	1999
	2000	if (retval == 0)
	2001	retval = retval2;
	2002	Err_Exit:
	2003	return (retval);
	2004	}
	2005
	2006
	2007	/*
	2008	* pagein for HFS filesystem
	2009	*/
	2010	int
	2011	hfs_pagein(ap)
	2012	struct vop_pagein_args /* {
	2013	struct vnode *a_vp,
	2014	upl_t a_pl,
	2015	vm_offset_t a_pl_offset,
	2016	off_t a_f_offset,
	2017	size_t a_size,
	2018	struct ucred *a_cred,
	2019	int a_flags
	2020	} / ap;
	2021	{
	2022	register struct vnode *vp = ap->a_vp;
	2023	int devBlockSize = 0;
	2024	int error;
	2025
	2026	if (vp->v_type != VREG)
	2027	panic("hfs_pagein: vp not UBC type\n");
	2028
	2029	VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
	2030
	2031	error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
	2032	ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
	2033	ap->a_flags);
	2034	/*
	2035	* Keep track blocks read
	2036	*/
	2037	if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
	2038	struct cnode *cp;
	2039
	2040	cp = VTOC(vp);
	2041	/*
	2042	* If this file hasn't been seen since the start of
	2043	* the current sampling period then start over.
	2044	*/
	2045	if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
	2046	VTOF(vp)->ff_bytesread = ap->a_size;
	2047	else
	2048	VTOF(vp)->ff_bytesread += ap->a_size;
	2049
	2050	cp->c_flag \|= C_ACCESS;
	2051	}
	2052
	2053	return (error);
	2054	}
	2055
	2056	/*
	2057	* pageout for HFS filesystem.
	2058	*/
	2059	int
	2060	hfs_pageout(ap)
	2061	struct vop_pageout_args /* {
	2062	struct vnode *a_vp,
	2063	upl_t a_pl,
	2064	vm_offset_t a_pl_offset,
	2065	off_t a_f_offset,
	2066	size_t a_size,
	2067	struct ucred *a_cred,
	2068	int a_flags
	2069	} / ap;
	2070	{
	2071	struct vnode *vp = ap->a_vp;
	2072	struct cnode *cp = VTOC(vp);
	2073	struct filefork *fp = VTOF(vp);
	2074	int retval;
	2075	int devBlockSize = 0;
	2076	off_t end_of_range;
	2077	off_t filesize;
	2078
	2079	if (UBCINVALID(vp))
	2080	panic("hfs_pageout: Not a VREG: vp=%x", vp);
	2081
	2082	VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
	2083	filesize = fp->ff_size;
	2084	end_of_range = ap->a_f_offset + ap->a_size - 1;
	2085
	2086	if (cp->c_flag & C_RELOCATING) {
	2087	if (end_of_range < (filesize / 2)) {
	2088	return (EBUSY);
	2089	}
	2090	}
	2091
	2092	if (end_of_range >= filesize)
	2093	end_of_range = (off_t)(filesize - 1);
	2094	if (ap->a_f_offset < filesize) {
	2095	rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
	2096	cp->c_flag \|= C_MODIFIED; /* leof is dirty */
	2097	}
	2098
	2099	retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
	2100	filesize, devBlockSize, ap->a_flags);
	2101
	2102	/*
	2103	* If we successfully wrote any data, and we are not the superuser
	2104	* we clear the setuid and setgid bits as a precaution against
	2105	* tampering.
	2106	*/
	2107	if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
	2108	cp->c_mode &= ~(S_ISUID \| S_ISGID);
	2109
	2110	return (retval);
	2111	}
	2112
	2113	/*
	2114	* Intercept B-Tree node writes to unswap them if necessary.
	2115	#
	2116	#vop_bwrite {
	2117	# IN struct buf *bp;
	2118	*/
	2119	int
	2120	hfs_bwrite(ap)
	2121	struct vop_bwrite_args /* {
	2122	struct buf *a_bp;
	2123	} / ap;
	2124	{
	2125	int retval = 0;
	2126	register struct buf *bp = ap->a_bp;
	2127	register struct vnode *vp = bp->b_vp;
	2128	#if BYTE_ORDER == LITTLE_ENDIAN
	2129	BlockDescriptor block;
	2130
	2131	/* Trap B-Tree writes */
	2132	if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) \|\|
	2133	(VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
	2134
	2135	/* Swap if the B-Tree node is in native byte order */
	2136	if (((UInt16 )((char )bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
	2137	/* Prepare the block pointer */
	2138	block.blockHeader = bp;
	2139	block.buffer = bp->b_data;
	2140	/* not found in cache ==> came from disk */
	2141	block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
	2142	block.blockSize = bp->b_bcount;
	2143
	2144	/* Endian un-swap B-Tree node */
	2145	SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
	2146	}
	2147
	2148	/* We don't check to make sure that it's 0x0e00 because it could be all zeros */
	2149	}
	2150	#endif
	2151	/* This buffer shouldn't be locked anymore but if it is clear it */
	2152	if (ISSET(bp->b_flags, B_LOCKED)) {
	2153	// XXXdbg
	2154	if (VTOHFS(vp)->jnl) {
	2155	panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
	2156	}
	2157	CLR(bp->b_flags, B_LOCKED);
	2158	printf("hfs_bwrite: called with lock bit set\n");
	2159	}
	2160	retval = vn_bwrite (ap);
	2161
	2162	return (retval);
	2163	}
	2164
	2165	/*
	2166	* Relocate a file to a new location on disk
	2167	* cnode must be locked on entry
	2168	*
	2169	* Relocation occurs by cloning the file's data from its
	2170	* current set of blocks to a new set of blocks. During
	2171	* the relocation all of the blocks (old and new) are
	2172	* owned by the file.
	2173	*
	2174	* -----------------
	2175	* \|///////////////\|
	2176	* -----------------
	2177	* 0 N (file offset)
	2178	*
	2179	* ----------------- -----------------
	2180	* \|///////////////\| \| \| STEP 1 (aquire new blocks)
	2181	* ----------------- -----------------
	2182	* 0 N N+1 2N
	2183	*
	2184	* ----------------- -----------------
	2185	* \|///////////////\| \|///////////////\| STEP 2 (clone data)
	2186	* ----------------- -----------------
	2187	* 0 N N+1 2N
	2188	*
	2189	* -----------------
	2190	* \|///////////////\| STEP 3 (head truncate blocks)
	2191	* -----------------
	2192	* 0 N
	2193	*
	2194	* During steps 2 and 3 page-outs to file offsets less
	2195	* than or equal to N are suspended.
	2196	*
	2197	* During step 3 page-ins to the file get supended.
	2198	*/
	2199	__private_extern__
	2200	int
	2201	hfs_relocate(vp, blockHint, cred, p)
	2202	struct vnode *vp;
	2203	u_int32_t blockHint;
	2204	struct ucred *cred;
	2205	struct proc *p;
	2206	{
	2207	struct filefork *fp;
	2208	struct hfsmount *hfsmp;
	2209	ExtendedVCB *vcb;
	2210
	2211	u_int32_t headblks;
	2212	u_int32_t datablks;
	2213	u_int32_t blksize;
	2214	u_int32_t realsize;
	2215	u_int32_t growsize;
	2216	u_int32_t nextallocsave;
	2217	u_int32_t sector_a;
	2218	u_int32_t sector_b;
	2219	int eflags;
	2220	u_int32_t oldstart; /* debug only */
	2221	off_t newbytes;
	2222	int retval, need_vinval=0;
	2223
	2224	if (vp->v_type != VREG && vp->v_type != VLNK) {
	2225	return (EPERM);
	2226	}
	2227
	2228	hfsmp = VTOHFS(vp);
	2229	if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
	2230	return (ENOSPC);
	2231	}
	2232
	2233	fp = VTOF(vp);
	2234	if (fp->ff_unallocblocks)
	2235	return (EINVAL);
	2236	vcb = VTOVCB(vp);
	2237	blksize = vcb->blockSize;
	2238	if (blockHint == 0)
	2239	blockHint = vcb->nextAllocation;
	2240
	2241	if ((fp->ff_size > (u_int64_t)0x7fffffff) \|\|
	2242	(vp->v_type == VLNK && fp->ff_size > blksize)) {
	2243	return (EFBIG);
	2244	}
	2245
	2246	headblks = fp->ff_blocks;
	2247	datablks = howmany(fp->ff_size, blksize);
	2248	growsize = datablks * blksize;
	2249	realsize = fp->ff_size;
	2250	eflags = kEFContigMask \| kEFAllMask \| kEFNoClumpMask;
	2251	if (blockHint >= hfsmp->hfs_metazone_start &&
	2252	blockHint <= hfsmp->hfs_metazone_end)
	2253	eflags \|= kEFMetadataMask;
	2254
	2255	hfs_global_shared_lock_acquire(hfsmp);
	2256	if (hfsmp->jnl) {
	2257	if (journal_start_transaction(hfsmp->jnl) != 0) {
	2258	return (EINVAL);
	2259	}
	2260	}
	2261
	2262	/* Lock extents b-tree (also protects volume bitmap) */
	2263	retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
	2264	if (retval)
	2265	goto out2;
	2266
	2267	retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
	2268	if (retval) {
	2269	retval = MacToVFSError(retval);
	2270	goto out;
	2271	}
	2272
	2273	/*
	2274	* STEP 1 - aquire new allocation blocks.
	2275	*/
	2276	nextallocsave = vcb->nextAllocation;
	2277	retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
	2278	if (eflags & kEFMetadataMask)
	2279	vcb->nextAllocation = nextallocsave;
	2280
	2281	retval = MacToVFSError(retval);
	2282	if (retval == 0) {
	2283	VTOC(vp)->c_flag \|= C_MODIFIED;
	2284	if (newbytes < growsize) {
	2285	retval = ENOSPC;
	2286	goto restore;
	2287	} else if (fp->ff_blocks < (headblks + datablks)) {
	2288	printf("hfs_relocate: allocation failed");
	2289	retval = ENOSPC;
	2290	goto restore;
	2291	}
	2292
	2293	retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
	2294	if (retval) {
	2295	retval = MacToVFSError(retval);
	2296	} else if ((sector_a + 1) == sector_b) {
	2297	retval = ENOSPC;
	2298	goto restore;
	2299	} else if ((eflags & kEFMetadataMask) &&
	2300	((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
	2301	hfsmp->hfs_metazone_end)) {
	2302	printf("hfs_relocate: didn't move into metadata zone\n");
	2303	retval = ENOSPC;
	2304	goto restore;
	2305	}
	2306	}
	2307	if (retval) {
	2308	/*
	2309	* Check to see if failure is due to excessive fragmentation.
	2310	*/
	2311	if (retval == ENOSPC &&
	2312	hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
	2313	hfsmp->hfs_flags \|= HFS_FRAGMENTED_FREESPACE;
	2314	}
	2315	goto out;
	2316	}
	2317
	2318	fp->ff_size = fp->ff_blocks * blksize;
	2319	if (UBCISVALID(vp))
	2320	(void) ubc_setsize(vp, fp->ff_size);
	2321
	2322	/*
	2323	* STEP 2 - clone data into the new allocation blocks.
	2324	*/
	2325
	2326	// XXXdbg - unlock the extents overflow file because hfs_clonefile()
	2327	// calls vinvalbuf() which calls hfs_fsync() which can
	2328	// call hfs_metasync() which may need to lock the catalog
	2329	// file -- but the catalog file may be locked and blocked
	2330	// waiting for the extents overflow file if we're unlucky.
	2331	// see radar 3742973 for more details.
	2332	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
	2333
	2334	if (vp->v_type == VLNK)
	2335	retval = hfs_clonelink(vp, blksize, cred, p);
	2336	else if (vp->v_flag & VSYSTEM)
	2337	retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
	2338	else
	2339	retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
	2340
	2341	// XXXdbg - relock the extents overflow file
	2342	(void)hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
	2343
	2344	if (retval)
	2345	goto restore;
	2346
	2347	oldstart = fp->ff_extents[0].startBlock;
	2348
	2349	/*
	2350	* STEP 3 - switch to clone and remove old blocks.
	2351	*/
	2352	SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */
	2353
	2354	retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
	2355
	2356	CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */
	2357	if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
	2358	wakeup(VTOC(vp));
	2359	if (retval)
	2360	goto restore;
	2361
	2362	fp->ff_size = realsize;
	2363	if (UBCISVALID(vp)) {
	2364	(void) ubc_setsize(vp, realsize);
	2365	need_vinval = 1;
	2366	}
	2367
	2368	CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */
	2369	out:
	2370	(void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
	2371
	2372	// XXXdbg - do this after unlocking the extents-overflow
	2373	// file to avoid deadlocks (see comment above by STEP 2)
	2374	if (need_vinval) {
	2375	(void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
	2376	}
	2377
	2378	retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
	2379	out2:
	2380	if (hfsmp->jnl) {
	2381	if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
	2382	(void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
	2383	else
	2384	(void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
	2385	journal_end_transaction(hfsmp->jnl);
	2386	}
	2387	hfs_global_shared_lock_release(hfsmp);
	2388
	2389	return (retval);
	2390
	2391	restore:
	2392	/*
	2393	* Give back any newly allocated space.
	2394	*/
	2395	if (fp->ff_size != realsize)
	2396	fp->ff_size = realsize;
	2397	(void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
	2398	if (UBCISVALID(vp))
	2399	(void) ubc_setsize(vp, fp->ff_size);
	2400	CLR(VTOC(vp)->c_flag, C_RELOCATING);
	2401	goto out;
	2402	}
	2403
	2404
	2405	/*
	2406	* Clone a symlink.
	2407	*
	2408	*/
	2409	static int
	2410	hfs_clonelink(struct vnode vp, int blksize, struct ucred cred, struct proc *p)
	2411	{
	2412	struct buf *head_bp = NULL;
	2413	struct buf *tail_bp = NULL;
	2414	int error;
	2415
	2416
	2417	error = meta_bread(vp, 0, blksize, cred, &head_bp);
	2418	if (error)
	2419	goto out;
	2420
	2421	tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
	2422	if (tail_bp == NULL) {
	2423	error = EIO;
	2424	goto out;
	2425	}
	2426	bcopy(head_bp->b_data, tail_bp->b_data, blksize);
	2427	error = bwrite(tail_bp);
	2428	out:
	2429	if (head_bp) {
	2430	head_bp->b_flags \|= B_INVAL;
	2431	brelse(head_bp);
	2432	}
	2433	(void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
	2434
	2435	return (error);
	2436	}
	2437
	2438	/*
	2439	* Clone a file's data within the file.
	2440	*
	2441	*/
	2442	static int
	2443	hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
	2444	struct ucred cred, struct proc p)
	2445	{
	2446	caddr_t bufp;
	2447	size_t writebase;
	2448	size_t bufsize;
	2449	size_t copysize;
	2450	size_t iosize;
	2451	size_t filesize;
	2452	size_t offset;
	2453	struct uio auio;
	2454	struct iovec aiov;
	2455	int devblocksize;
	2456	int didhold;
	2457	int error;
	2458
	2459
	2460	if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
	2461	printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
	2462	return (error);
	2463	}
	2464
	2465	if (!ubc_clean(vp, 1)) {
	2466	printf("hfs_clonefile: not ubc_clean\n");
	2467	return (EIO); /* XXX error code */
	2468	}
	2469
	2470	/*
	2471	* Suspend page-outs for this file.
	2472	*/
	2473	SET(VTOC(vp)->c_flag, C_RELOCATING);
	2474
	2475	filesize = VTOF(vp)->ff_size;
	2476	writebase = blkstart * blksize;
	2477	copysize = blkcnt * blksize;
	2478	iosize = bufsize = MIN(copysize, 4096 * 16);
	2479	offset = 0;
	2480
	2481	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	2482	return (ENOMEM);
	2483	}
	2484
	2485	VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
	2486
	2487	auio.uio_iov = &aiov;
	2488	auio.uio_iovcnt = 1;
	2489	auio.uio_segflg = UIO_SYSSPACE;
	2490	auio.uio_procp = p;
	2491
	2492	while (offset < copysize) {
	2493	iosize = MIN(copysize - offset, iosize);
	2494
	2495	aiov.iov_base = bufp;
	2496	aiov.iov_len = iosize;
	2497	auio.uio_resid = iosize;
	2498	auio.uio_offset = offset;
	2499	auio.uio_rw = UIO_READ;
	2500
	2501	error = cluster_read(vp, &auio, copysize, devblocksize, 0);
	2502	if (error) {
	2503	printf("hfs_clonefile: cluster_read failed - %d\n", error);
	2504	break;
	2505	}
	2506	if (auio.uio_resid != 0) {
	2507	printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
	2508	error = EIO;
	2509	break;
	2510	}
	2511
	2512
	2513	aiov.iov_base = bufp;
	2514	aiov.iov_len = iosize;
	2515	auio.uio_resid = iosize;
	2516	auio.uio_offset = writebase + offset;
	2517	auio.uio_rw = UIO_WRITE;
	2518
	2519	error = cluster_write(vp, &auio, filesize + offset,
	2520	filesize + offset + iosize,
	2521	auio.uio_offset, 0, devblocksize, 0);
	2522	if (error) {
	2523	printf("hfs_clonefile: cluster_write failed - %d\n", error);
	2524	break;
	2525	}
	2526	if (auio.uio_resid != 0) {
	2527	printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
	2528	error = EIO;
	2529	break;
	2530	}
	2531	offset += iosize;
	2532	}
	2533	if (error == 0) {
	2534	/* Clean the pages in VM. */
	2535	didhold = ubc_hold(vp);
	2536	if (didhold)
	2537	(void) ubc_clean(vp, 1);
	2538
	2539	/*
	2540	* Clean out all associated buffers.
	2541	*/
	2542	(void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
	2543
	2544	if (didhold)
	2545	ubc_rele(vp);
	2546	}
	2547	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	2548
	2549	return (error);
	2550	}
	2551
	2552	/*
	2553	* Clone a system (metadata) file.
	2554	*
	2555	*/
	2556	static int
	2557	hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
	2558	struct ucred cred, struct proc p)
	2559	{
	2560	caddr_t bufp;
	2561	char * offset;
	2562	size_t bufsize;
	2563	size_t iosize;
	2564	struct buf *bp = NULL;
	2565	daddr_t blkno;
	2566	daddr_t blk;
	2567	int breadcnt;
	2568	int i;
	2569	int error = 0;
	2570
	2571
	2572	iosize = GetLogicalBlockSize(vp);
	2573	bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
	2574	breadcnt = bufsize / iosize;
	2575
	2576	if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
	2577	return (ENOMEM);
	2578	}
	2579	blkstart = (blkstart * blksize) / iosize;
	2580	blkcnt = (blkcnt * blksize) / iosize;
	2581	blkno = 0;
	2582
	2583	while (blkno < blkcnt) {
	2584	/*
	2585	* Read up to a megabyte
	2586	*/
	2587	offset = bufp;
	2588	for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
	2589	error = meta_bread(vp, blk, iosize, cred, &bp);
	2590	if (error) {
	2591	printf("hfs_clonesysfile: meta_bread error %d\n", error);
	2592	goto out;
	2593	}
	2594	if (bp->b_bcount != iosize) {
	2595	printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
	2596	goto out;
	2597	}
	2598
	2599	bcopy(bp->b_data, offset, iosize);
	2600	bp->b_flags \|= B_INVAL;
	2601	brelse(bp);
	2602	bp = NULL;
	2603	offset += iosize;
	2604	}
	2605
	2606	/*
	2607	* Write up to a megabyte
	2608	*/
	2609	offset = bufp;
	2610	for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
	2611	bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
	2612	if (bp == NULL) {
	2613	printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
	2614	error = EIO;
	2615	goto out;
	2616	}
	2617	bcopy(offset, bp->b_data, iosize);
	2618	error = bwrite(bp);
	2619	bp = NULL;
	2620	if (error)
	2621	goto out;
	2622	offset += iosize;
	2623	}
	2624	}
	2625	out:
	2626	if (bp) {
	2627	brelse(bp);
	2628	}
	2629
	2630	kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
	2631
	2632	error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
	2633
	2634	return (error);
	2635	}
	2636