X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..ab86ba338a07a58a89f50cf7066a0f0e487ac0cc:/bsd/hfs/hfs_readwrite.c diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index 4544a7685..10b3a271e 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,21 +1,24 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -32,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +67,10 @@ enum { extern u_int32_t GetLogicalBlockSize(struct vnode *vp); +static int hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *); +static int hfs_clonefile(struct vnode *, int, int, int, struct ucred *, struct proc *); +static int hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *); + /***************************************************************************** * @@ -94,18 +102,16 @@ hfs_read(ap) register struct vnode *vp = ap->a_vp; struct cnode *cp; struct filefork *fp; - struct buf *bp; - daddr_t logBlockNo; - u_long fragSize, moveSize, startOffset, ioxfersize; int devBlockSize = 0; - off_t bytesRemaining; int retval = 0; off_t filesize; off_t filebytes; + off_t start_resid = uio->uio_resid; + /* Preflight checks */ - if (vp->v_type != VREG && vp->v_type != VLNK) - return (EISDIR); /* HFS can only read files */ + if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) + return (EPERM); /* can only read regular files */ if (uio->uio_resid == 0) return (0); /* Nothing left to do */ if (uio->uio_offset < 0) @@ -127,105 +133,29 @@ hfs_read(ap) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); - if (UBCISVALID(vp)) { - retval = cluster_read(vp, uio, filesize, devBlockSize, 0); - } else { - - for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { - - if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0) - break; - - logBlockNo = (daddr_t)(uio->uio_offset / PAGE_SIZE_64); - startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64); - fragSize = PAGE_SIZE; - - if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize) - ioxfersize = fragSize; - else { - ioxfersize = filesize - (logBlockNo * PAGE_SIZE); - ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1); - } - moveSize = ioxfersize; - moveSize -= startOffset; - - if (bytesRemaining < moveSize) - moveSize = bytesRemaining; - - if (uio->uio_resid < moveSize) { - moveSize = uio->uio_resid; - }; - if (moveSize == 0) { - break; - }; - - if (( uio->uio_offset + fragSize) >= filesize) { - retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp); - - } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) { - daddr_t nextLogBlockNo = logBlockNo + 1; - int nextsize; - - if (((nextLogBlockNo * PAGE_SIZE) + - (daddr_t)fragSize) < filesize) - nextsize = fragSize; - else { - nextsize = filesize - (nextLogBlockNo * PAGE_SIZE); - nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1); - } - retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp); - } else { - retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp); - }; - - if (retval != E_NONE) { - if (bp) { - brelse(bp); - bp = NULL; - } - break; - }; - vp->v_lastr = logBlockNo; - - /* - * We should only get non-zero b_resid when an I/O retval - * has occurred, which should cause us to break above. - * However, if the short read did not cause an retval, - * then we want to ensure that we do not uiomove bad - * or uninitialized data. - */ - ioxfersize -= bp->b_resid; - - if (ioxfersize < moveSize) { /* XXX PPD This should take the offset into account, too! */ - if (ioxfersize == 0) - break; - moveSize = ioxfersize; - } - if ((startOffset + moveSize) > bp->b_bcount) - panic("hfs_read: bad startOffset or moveSize\n"); - - if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio))) - break; - - if (S_ISREG(cp->c_mode) && - (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) { - bp->b_flags |= B_AGE; - }; - - brelse(bp); - /* Start of loop resets bp to NULL before reaching outside this block... */ - } - - if (bp != NULL) { - brelse(bp); - } - } + retval = cluster_read(vp, uio, filesize, devBlockSize, 0); cp->c_flag |= C_ACCESS; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); + /* + * Keep track blocks read + */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) { + /* + * If this file hasn't been seen since the start of + * the current sampling period then start over. + */ + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { + fp->ff_bytesread = start_resid - uio->uio_resid; + cp->c_atime = time.tv_sec; + } else { + fp->ff_bytesread += start_resid - uio->uio_resid; + } + } + return (retval); } @@ -253,41 +183,48 @@ hfs_write(ap) struct uio *uio = ap->a_uio; struct cnode *cp; struct filefork *fp; - struct buf *bp; struct proc *p; struct timeval tv; ExtendedVCB *vcb; - int devBlockSize = 0; - daddr_t logBlockNo; - long fragSize; - off_t origFileSize, currOffset, writelimit, bytesToAdd; - off_t actualBytesAdded; - u_long blkoffset, resid, xfersize, clearSize; - int eflags, ioflag; - int retval; + int devBlockSize = 0; + off_t origFileSize, writelimit, bytesToAdd; + off_t actualBytesAdded; + u_long resid; + int eflags, ioflag; + int retval; off_t filebytes; - u_long fileblocks; + struct hfsmount *hfsmp; + int started_tr = 0, grabbed_lock = 0; - ioflag = ap->a_ioflag; if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (E_NONE); - if (vp->v_type != VREG && vp->v_type != VLNK) - return (EISDIR); /* Can only write files */ + if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) + return (EPERM); /* Can only write regular files */ + ioflag = ap->a_ioflag; cp = VTOC(vp); fp = VTOF(vp); vcb = VTOVCB(vp); - fileblocks = fp->ff_blocks; - filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; if (ioflag & IO_APPEND) uio->uio_offset = fp->ff_size; if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size) return (EPERM); + // XXXdbg - don't allow modification of the journal or journal_info_block + if (VTOHFS(vp)->jnl && cp->c_datafork) { + struct HFSPlusExtentDescriptor *extd; + + extd = &cp->c_datafork->ff_extents[0]; + if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { + return EPERM; + } + } + writelimit = uio->uio_offset + uio->uio_resid; /* @@ -309,19 +246,6 @@ hfs_write(ap) eflags = kEFDeferMask; /* defer file block allocations */ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; - /* - * NOTE: In the following loop there are two positions tracked: - * currOffset is the current I/O starting offset. currOffset - * is never >LEOF; the LEOF is nudged along with currOffset as - * data is zeroed or written. uio->uio_offset is the start of - * the current I/O operation. It may be arbitrarily beyond - * currOffset. - * - * The following is true at all times: - * currOffset <= LEOF <= uio->uio_offset <= writelimit - */ - currOffset = MIN(uio->uio_offset, fp->ff_size); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); retval = 0; @@ -333,24 +257,54 @@ hfs_write(ap) if(writelimit > filebytes) { bytesToAdd = writelimit - filebytes; - retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, fp->ff_clumpsize)), + retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)), ap->a_cred, 0); if (retval) return (retval); } #endif /* QUOTA */ + hfsmp = VTOHFS(vp); + +#ifdef HFS_SPARSE_DEV + /* + * When the underlying device is sparse and space + * is low (< 8MB), stop doing delayed allocations + * and begin doing synchronous I/O. + */ + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && + (hfs_freeblks(hfsmp, 0) < 2048)) { + eflags &= ~kEFDeferMask; + ioflag |= IO_SYNC; + } +#endif /* HFS_SPARSE_DEV */ + + if (writelimit > filebytes) { + hfs_global_shared_lock_acquire(hfsmp); + grabbed_lock = 1; + } + if (hfsmp->jnl && (writelimit > filebytes)) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + hfs_global_shared_lock_release(hfsmp); + return EINVAL; + } + started_tr = 1; + } + while (writelimit > filebytes) { - bytesToAdd = writelimit - filebytes; - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) eflags |= kEFReserveMask; /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc()); if (retval != E_NONE) break; - + + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd, 0, eflags, &actualBytesAdded)); @@ -364,7 +318,21 @@ hfs_write(ap) (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); } - if (UBCISVALID(vp) && retval == E_NONE) { + // XXXdbg + if (started_tr) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + started_tr = 0; + } + if (grabbed_lock) { + hfs_global_shared_lock_release(hfsmp); + grabbed_lock = 0; + } + + if (retval == E_NONE) { off_t filesize; off_t zero_off; off_t tail_off; @@ -388,8 +356,10 @@ hfs_write(ap) of the transfer to see whether is invalid and should be zero-filled as part of the transfer: */ - if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) - lflag |= IO_HEADZEROFILL; + if (uio->uio_offset > zero_off) { + if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) + lflag |= IO_HEADZEROFILL; + } } else { off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; @@ -489,105 +459,10 @@ hfs_write(ap) } if (resid > uio->uio_resid) cp->c_flag |= C_CHANGE | C_UPDATE; - } else { - while (retval == E_NONE && uio->uio_resid > 0) { - logBlockNo = currOffset / PAGE_SIZE; - blkoffset = currOffset & PAGE_MASK; - - if ((filebytes - currOffset) < PAGE_SIZE_64) - fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64); - else - fragSize = PAGE_SIZE; - xfersize = fragSize - blkoffset; - - /* Make any adjustments for boundary conditions */ - if (currOffset + (off_t)xfersize > writelimit) - xfersize = writelimit - currOffset; - - /* - * There is no need to read into bp if: - * We start on a block boundary and will overwrite the whole block - * - * OR - */ - if ((blkoffset == 0) && (xfersize >= fragSize)) { - bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ); - retval = 0; - - if (bp->b_blkno == -1) { - brelse(bp); - retval = EIO; /* XXX */ - break; - } - } else { - - if (currOffset == fp->ff_size && blkoffset == 0) { - bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ); - retval = 0; - if (bp->b_blkno == -1) { - brelse(bp); - retval = EIO; /* XXX */ - break; - } - } else { - /* - * This I/O transfer is not sufficiently aligned, - * so read the affected block into a buffer: - */ - retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp); - if (retval != E_NONE) { - if (bp) - brelse(bp); - break; - } - } - } - - /* See if we are starting to write within file boundaries: - * If not, then we need to present a "hole" for the area - * between the current EOF and the start of the current - * I/O operation: - * - * Note that currOffset is only less than uio_offset if - * uio_offset > LEOF... - */ - if (uio->uio_offset > currOffset) { - clearSize = MIN(uio->uio_offset - currOffset, xfersize); - bzero(bp->b_data + blkoffset, clearSize); - currOffset += clearSize; - blkoffset += clearSize; - xfersize -= clearSize; - } - - if (xfersize > 0) { - retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio); - currOffset += xfersize; - } - - if (ioflag & IO_SYNC) { - (void)VOP_BWRITE(bp); - } else if ((xfersize + blkoffset) == fragSize) { - bp->b_flags |= B_AGE; - bawrite(bp); - } else { - bdwrite(bp); - } - - /* Update the EOF if we just extended the file - * (the PEOF has already been moved out and the - * block mapping table has been updated): - */ - if (currOffset > fp->ff_size) { - fp->ff_size = currOffset; - if (UBCISVALID(vp)) - ubc_setsize(vp, fp->ff_size); /* XXX check errors */ - } - if (retval || (resid == 0)) - break; - cp->c_flag |= C_CHANGE | C_UPDATE; - } /* endwhile */ } + HFS_KNOTE(vp, NOTE_WRITE); + ioerr_exit: /* * If we successfully wrote any data, and we are not the superuser @@ -609,6 +484,7 @@ ioerr_exit: tv = time; retval = VOP_UPDATE(vp, &tv, &tv, 1); } + vcb->vcbWrCnt++; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); @@ -617,6 +493,22 @@ ioerr_exit: } +#ifdef HFS_SPARSE_DEV +struct hfs_backingstoreinfo { + int signature; /* == 3419115 */ + int version; /* version of this struct (1) */ + int backingfd; /* disk image file (on backing fs) */ + int bandsize; /* sparse disk image band size */ +}; + +#define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo) +#define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) + +#define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO) +#define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) + +#endif /* HFS_SPARSE_DEV */ + /* #% ioctl vp U U U @@ -645,10 +537,127 @@ hfs_ioctl(ap) } */ *ap; { switch (ap->a_command) { - case 1: { + +#ifdef HFS_SPARSE_DEV + case HFS_SETBACKINGSTOREINFO: { + struct hfsmount * hfsmp; + struct vnode * bsfs_rootvp; + struct vnode * di_vp; + struct file * di_fp; + struct hfs_backingstoreinfo *bsdata; + int error = 0; + + hfsmp = VTOHFS(ap->a_vp); + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + return (EALREADY); + } + if (ap->a_p->p_ucred->cr_uid != 0 && + ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + return (EACCES); /* must be owner of file system */ + } + bsdata = (struct hfs_backingstoreinfo *)ap->a_data; + if (bsdata == NULL) { + return (EINVAL); + } + if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) { + return (error); + } + if (fref(di_fp) == -1) { + return (EBADF); + } + if (di_fp->f_type != DTYPE_VNODE) { + frele(di_fp); + return (EINVAL); + } + di_vp = (struct vnode *)di_fp->f_data; + if (ap->a_vp->v_mount == di_vp->v_mount) { + frele(di_fp); + return (EINVAL); + } + + /* + * Obtain the backing fs root vnode and keep a reference + * on it. This reference will be dropped in hfs_unmount. + */ + error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp); + if (error) { + frele(di_fp); + return (error); + } + VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p); /* Hold on to the reference */ + + hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; + hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; + hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; + hfsmp->hfs_sparsebandblks *= 4; + + frele(di_fp); + return (0); + } + case HFS_CLRBACKINGSTOREINFO: { + struct hfsmount * hfsmp; + struct vnode * tmpvp; + + hfsmp = VTOHFS(ap->a_vp); + if (ap->a_p->p_ucred->cr_uid != 0 && + ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + return (EACCES); /* must be owner of file system */ + } + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && + hfsmp->hfs_backingfs_rootvp) { + + hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; + tmpvp = hfsmp->hfs_backingfs_rootvp; + hfsmp->hfs_backingfs_rootvp = NULLVP; + hfsmp->hfs_sparsebandblks = 0; + vrele(tmpvp); + } + return (0); + } +#endif /* HFS_SPARSE_DEV */ + + case 6: { + int error; + + ap->a_vp->v_flag |= VFULLFSYNC; + error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p); + ap->a_vp->v_flag &= ~VFULLFSYNC; + + return error; + } + case 5: { + register struct vnode *vp; register struct cnode *cp; + struct filefork *fp; + int error; + + vp = ap->a_vp; + cp = VTOC(vp); + fp = VTOF(vp); + + if (vp->v_type != VREG) + return EINVAL; + + VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ); + error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); + if (error) + return (error); + + /* + * used by regression test to determine if + * all the dirty pages (via write) have been cleaned + * after a call to 'fsysnc'. + */ + error = is_file_clean(vp, fp->ff_size); + VOP_UNLOCK(vp, 0, ap->a_p); + + return (error); + } + + case 1: { register struct vnode *vp; register struct radvisory *ra; + register struct cnode *cp; struct filefork *fp; int devBlockSize = 0; int error; @@ -952,6 +961,8 @@ hfs_cmap(ap) struct proc *p = NULL; struct rl_entry *invalid_range; enum rl_overlaptype overlaptype; + int started_tr = 0, grabbed_lock = 0; + struct timeval tv; /* * Check for underlying vnode requests and ensure that logical @@ -960,12 +971,49 @@ hfs_cmap(ap) if (ap->a_bpn == NULL) return (0); - if (overflow_extents(fp) || fp->ff_unallocblocks) { + p = current_proc(); + + if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) { + /* + * File blocks are getting remapped. Wait until its finished. + */ + SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP); + (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0); + if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) + panic("hfs_cmap: no mappable blocks"); + } + + retry: + if (fp->ff_unallocblocks) { lockExtBtree = 1; - p = current_proc(); + + // XXXdbg + hfs_global_shared_lock_acquire(hfsmp); + grabbed_lock = 1; + + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + hfs_global_shared_lock_release(hfsmp); + return EINVAL; + } else { + started_tr = 1; + } + } + if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) { + if (started_tr) { + journal_end_transaction(hfsmp->jnl); + } + if (grabbed_lock) { + hfs_global_shared_lock_release(hfsmp); + } return (retval); - } + } + } else if (overflow_extents(fp)) { + lockExtBtree = 1; + if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) { + return retval; + } } /* @@ -974,6 +1022,22 @@ hfs_cmap(ap) if (fp->ff_unallocblocks) { SInt64 reqbytes, actbytes; + // + // Make sure we have a transaction. It's possible + // that we came in and fp->ff_unallocblocks was zero + // but during the time we blocked acquiring the extents + // btree, ff_unallocblocks became non-zero and so we + // will need to start a transaction. + // + if (hfsmp->jnl && started_tr == 0) { + if (lockExtBtree) { + (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); + lockExtBtree = 0; + } + + goto retry; + } + reqbytes = (SInt64)fp->ff_unallocblocks * (SInt64)HFSTOVCB(hfsmp)->blockSize; /* @@ -989,6 +1053,10 @@ hfs_cmap(ap) fp->ff_blocks -= fp->ff_unallocblocks; fp->ff_unallocblocks = 0; + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } while (retval == 0 && reqbytes > 0) { retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp), (FCB*)fp, reqbytes, 0, @@ -1007,10 +1075,20 @@ hfs_cmap(ap) } if (retval) { - (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); - return (retval); - } - VTOC(ap->a_vp)->c_flag |= C_MODIFIED; + (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); + VTOC(ap->a_vp)->c_flag |= C_MODIFIED; + if (started_tr) { + tv = time; + VOP_UPDATE(ap->a_vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + if (grabbed_lock) { + hfs_global_shared_lock_release(hfsmp); + } + return (retval); + } } retval = MacToVFSError( @@ -1024,6 +1102,20 @@ hfs_cmap(ap) if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); + // XXXdbg + if (started_tr) { + tv = time; + retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + started_tr = 0; + } + if (grabbed_lock) { + hfs_global_shared_lock_release(hfsmp); + grabbed_lock = 0; + } + if (retval == E_NONE) { /* Adjust the mapping information for invalid file ranges: */ overlaptype = rl_scan(&fp->ff_invalidranges, @@ -1153,6 +1245,11 @@ hfs_strategy_fragmented(struct buf *bp) } frag->b_vp = NULL; + // + // XXXdbg - in the case that this is a meta-data block, it won't affect + // the journal because this bp is for a physical disk block, + // not a logical block that is part of the catalog or extents + // files. SET(frag->b_flags, B_INVAL); brelse(frag); @@ -1256,21 +1353,7 @@ hfs_strategy(ap) } -/* -# -#% truncate vp L L L -# -vop_truncate { - IN struct vnode *vp; - IN off_t length; - IN int flags; (IO_SYNC) - IN struct ucred *cred; - IN struct proc *p; -}; - * Truncate a cnode to at most length size, freeing (or adding) the - * disk blocks. - */ -int hfs_truncate(ap) +static int do_hfs_truncate(ap) struct vop_truncate_args /* { struct vnode *a_vp; off_t a_length; @@ -1291,6 +1374,7 @@ int hfs_truncate(ap) off_t filebytes; u_long fileblocks; int blksize; + struct hfsmount *hfsmp; if (vp->v_type != VREG && vp->v_type != VLNK) return (EISDIR); /* cannot truncate an HFS directory! */ @@ -1309,10 +1393,16 @@ int hfs_truncate(ap) if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) return (EFBIG); + hfsmp = VTOHFS(vp); tv = time; retval = E_NONE; + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } + /* * We cannot just check if fp->ff_size == length (as an optimization) * since there may be extra physical blocks that also need truncation. @@ -1329,7 +1419,7 @@ int hfs_truncate(ap) */ if (length > fp->ff_size) { #if QUOTA - retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, fp->ff_clumpsize)), + retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)), ap->a_cred, 0); if (retval) goto Err_Exit; @@ -1340,24 +1430,49 @@ int hfs_truncate(ap) */ if (length > filebytes) { int eflags; + u_long blockHint = 0; /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) eflags |= kEFReserveMask; /* keep a reserve */ + /* + * Allocate Journal and Quota files in metadata zone. + */ + if (filebytes == 0 && + hfsmp->hfs_flags & HFS_METADATA_ZONE && + hfs_virtualmetafile(cp)) { + eflags |= kEFMetadataMask; + blockHint = hfsmp->hfs_metazone_start; + } + // XXXdbg + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + retval = EINVAL; + goto Err_Exit; + } + } + /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) + if (retval) { + if (hfsmp->jnl) { + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + goto Err_Exit; + } while ((length > filebytes) && (retval == E_NONE)) { bytesToAdd = length - filebytes; retval = MacToVFSError(ExtendFileC(VTOVCB(vp), (FCB*)fp, bytesToAdd, - 0, + blockHint, eflags, &actualBytesAdded)); @@ -1368,7 +1483,19 @@ int hfs_truncate(ap) break; } } /* endwhile */ + (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); + + // XXXdbg + if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + if (retval) goto Err_Exit; @@ -1484,16 +1611,41 @@ int hfs_truncate(ap) #if QUOTA off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); #endif /* QUOTA */ + // XXXdbg + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + retval = EINVAL; + goto Err_Exit; + } + } + /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) + if (retval) { + if (hfsmp->jnl) { + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); goto Err_Exit; + } if (fp->ff_unallocblocks == 0) retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, false)); (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); + + // XXXdbg + if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + filebytes = (off_t)fp->ff_blocks * (off_t)blksize; if (retval) goto Err_Exit; @@ -1523,6 +1675,83 @@ Err_Exit: } +/* +# +#% truncate vp L L L +# +vop_truncate { + IN struct vnode *vp; + IN off_t length; + IN int flags; (IO_SYNC) + IN struct ucred *cred; + IN struct proc *p; +}; + * Truncate a cnode to at most length size, freeing (or adding) the + * disk blocks. + */ +int hfs_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct cnode *cp = VTOC(vp); + struct filefork *fp = VTOF(vp); + off_t length; + off_t filebytes; + u_long fileblocks; + int blksize, error; + u_int64_t nsize; + + if (vp->v_type != VREG && vp->v_type != VLNK) + return (EISDIR); /* cannot truncate an HFS directory! */ + + length = ap->a_length; + blksize = VTOVCB(vp)->blockSize; + fileblocks = fp->ff_blocks; + filebytes = (off_t)fileblocks * (off_t)blksize; + + // have to loop truncating or growing files that are + // really big because otherwise transactions can get + // enormous and consume too many kernel resources. + if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) { + while (filebytes > length) { + if ((filebytes - length) > HFS_BIGFILE_SIZE) { + filebytes -= HFS_BIGFILE_SIZE; + } else { + filebytes = length; + } + + ap->a_length = filebytes; + error = do_hfs_truncate(ap); + if (error) + break; + } + } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) { + while (filebytes < length) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { + filebytes += HFS_BIGFILE_SIZE; + } else { + filebytes = (length - filebytes); + } + + ap->a_length = filebytes; + error = do_hfs_truncate(ap); + if (error) + break; + } + } else { + error = do_hfs_truncate(ap); + } + + return error; +} + + /* # @@ -1553,6 +1782,7 @@ int hfs_allocate(ap) struct vnode *vp = ap->a_vp; struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); + ExtendedVCB *vcb = VTOVCB(vp); off_t length = ap->a_length; off_t startingPEOF; off_t moreBytesRequested; @@ -1563,28 +1793,30 @@ int hfs_allocate(ap) struct timeval tv; int retval, retval2; UInt32 blockHint; - UInt32 extendFlags =0; /* For call to ExtendFileC */ + UInt32 extendFlags; /* For call to ExtendFileC */ + struct hfsmount *hfsmp; + + hfsmp = VTOHFS(vp); *(ap->a_bytesallocated) = 0; fileblocks = fp->ff_blocks; - filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; if (length < (off_t)0) return (EINVAL); - if (vp->v_type != VREG && vp->v_type != VLNK) + if (vp->v_type != VREG) return (EISDIR); - if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes)) + if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) return (EINVAL); /* Fill in the flags word for the call to Extend the file */ + extendFlags = kEFNoClumpMask; if (ap->a_flags & ALLOCATECONTIG) extendFlags |= kEFContigMask; - if (ap->a_flags & ALLOCATEALL) extendFlags |= kEFAllMask; - - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) extendFlags |= kEFReserveMask; tv = time; @@ -1610,17 +1842,52 @@ int hfs_allocate(ap) moreBytesRequested = length - filebytes; #if QUOTA - retval = hfs_chkdq(cp, (int64_t)(roundup(moreBytesRequested, fp->ff_clumpsize)), + retval = hfs_chkdq(cp, + (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), ap->a_cred, 0); if (retval) return (retval); #endif /* QUOTA */ + /* + * Metadata zone checks. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* + * Allocate Journal and Quota files in metadata zone. + */ + if (hfs_virtualmetafile(cp)) { + extendFlags |= kEFMetadataMask; + blockHint = hfsmp->hfs_metazone_start; + } else if ((blockHint >= hfsmp->hfs_metazone_start) && + (blockHint <= hfsmp->hfs_metazone_end)) { + /* + * Move blockHint outside metadata zone. + */ + blockHint = hfsmp->hfs_metazone_end + 1; + } + } + + // XXXdbg + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + retval = EINVAL; + goto Err_Exit; + } + } + /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) goto Err_Exit; + if (retval) { + if (hfsmp->jnl) { + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + goto Err_Exit; + } - retval = MacToVFSError(ExtendFileC(VTOVCB(vp), + retval = MacToVFSError(ExtendFileC(vcb, (FCB*)fp, moreBytesRequested, blockHint, @@ -1628,9 +1895,20 @@ int hfs_allocate(ap) &actualBytesAdded)); *(ap->a_bytesallocated) = actualBytesAdded; - filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; + (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); + // XXXdbg + if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + /* * if we get an error and no changes were made then exit * otherwise we must do the VOP_UPDATE to reflect the changes @@ -1647,7 +1925,7 @@ int hfs_allocate(ap) */ if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded)) *(ap->a_bytesallocated) = - roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize); + roundup(moreBytesRequested, (off_t)vcb->blockSize); } else { /* Shorten the size of the file */ @@ -1661,18 +1939,45 @@ int hfs_allocate(ap) (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0); } + // XXXdbg + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + retval = EINVAL; + goto Err_Exit; + } + } + /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p); - if (retval) goto Err_Exit; + if (retval) { + if (hfsmp->jnl) { + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + + goto Err_Exit; + } retval = MacToVFSError( TruncateFileC( - VTOVCB(vp), + vcb, (FCB*)fp, length, false)); (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); - filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; + + if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + + /* * if we get an error and no changes were made then exit * otherwise we must do the VOP_UPDATE to reflect the changes @@ -1721,7 +2026,7 @@ hfs_pagein(ap) int devBlockSize = 0; int error; - if (vp->v_type != VREG && vp->v_type != VLNK) + if (vp->v_type != VREG) panic("hfs_pagein: vp not UBC type\n"); VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize); @@ -1729,6 +2034,25 @@ hfs_pagein(ap) error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize, ap->a_flags); + /* + * Keep track blocks read + */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { + struct cnode *cp; + + cp = VTOC(vp); + /* + * If this file hasn't been seen since the start of + * the current sampling period then start over. + */ + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) + VTOF(vp)->ff_bytesread = ap->a_size; + else + VTOF(vp)->ff_bytesread += ap->a_size; + + cp->c_flag |= C_ACCESS; + } + return (error); } @@ -1762,10 +2086,18 @@ hfs_pageout(ap) filesize = fp->ff_size; end_of_range = ap->a_f_offset + ap->a_size - 1; + if (cp->c_flag & C_RELOCATING) { + if (end_of_range < (filesize / 2)) { + return (EBUSY); + } + } + if (end_of_range >= filesize) end_of_range = (off_t)(filesize - 1); - if (ap->a_f_offset < filesize) + if (ap->a_f_offset < filesize) { rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, filesize, devBlockSize, ap->a_flags); @@ -1794,9 +2126,9 @@ hfs_bwrite(ap) } */ *ap; { int retval = 0; -#if BYTE_ORDER == LITTLE_ENDIAN register struct buf *bp = ap->a_bp; register struct vnode *vp = bp->b_vp; +#if BYTE_ORDER == LITTLE_ENDIAN BlockDescriptor block; /* Trap B-Tree writes */ @@ -1820,11 +2152,471 @@ hfs_bwrite(ap) } #endif /* This buffer shouldn't be locked anymore but if it is clear it */ - if (ISSET(ap->a_bp->b_flags, B_LOCKED)) { - CLR(ap->a_bp->b_flags, B_LOCKED); + if (ISSET(bp->b_flags, B_LOCKED)) { + // XXXdbg + if (VTOHFS(vp)->jnl) { + panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp); + } + CLR(bp->b_flags, B_LOCKED); printf("hfs_bwrite: called with lock bit set\n"); } retval = vn_bwrite (ap); return (retval); } + +/* + * Relocate a file to a new location on disk + * cnode must be locked on entry + * + * Relocation occurs by cloning the file's data from its + * current set of blocks to a new set of blocks. During + * the relocation all of the blocks (old and new) are + * owned by the file. + * + * ----------------- + * |///////////////| + * ----------------- + * 0 N (file offset) + * + * ----------------- ----------------- + * |///////////////| | | STEP 1 (aquire new blocks) + * ----------------- ----------------- + * 0 N N+1 2N + * + * ----------------- ----------------- + * |///////////////| |///////////////| STEP 2 (clone data) + * ----------------- ----------------- + * 0 N N+1 2N + * + * ----------------- + * |///////////////| STEP 3 (head truncate blocks) + * ----------------- + * 0 N + * + * During steps 2 and 3 page-outs to file offsets less + * than or equal to N are suspended. + * + * During step 3 page-ins to the file get supended. + */ +__private_extern__ +int +hfs_relocate(vp, blockHint, cred, p) + struct vnode *vp; + u_int32_t blockHint; + struct ucred *cred; + struct proc *p; +{ + struct filefork *fp; + struct hfsmount *hfsmp; + ExtendedVCB *vcb; + + u_int32_t headblks; + u_int32_t datablks; + u_int32_t blksize; + u_int32_t realsize; + u_int32_t growsize; + u_int32_t nextallocsave; + u_int32_t sector_a; + u_int32_t sector_b; + int eflags; + u_int32_t oldstart; /* debug only */ + off_t newbytes; + int retval; + + if (vp->v_type != VREG && vp->v_type != VLNK) { + return (EPERM); + } + + hfsmp = VTOHFS(vp); + if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) { + return (ENOSPC); + } + + fp = VTOF(vp); + if (fp->ff_unallocblocks) + return (EINVAL); + vcb = VTOVCB(vp); + blksize = vcb->blockSize; + if (blockHint == 0) + blockHint = vcb->nextAllocation; + + if ((fp->ff_size > (u_int64_t)0x7fffffff) || + (vp->v_type == VLNK && fp->ff_size > blksize)) { + return (EFBIG); + } + + headblks = fp->ff_blocks; + datablks = howmany(fp->ff_size, blksize); + growsize = datablks * blksize; + realsize = fp->ff_size; + eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; + if (blockHint >= hfsmp->hfs_metazone_start && + blockHint <= hfsmp->hfs_metazone_end) + eflags |= kEFMetadataMask; + + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + return (EINVAL); + } + } + + /* Lock extents b-tree (also protects volume bitmap) */ + retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p); + if (retval) + goto out2; + + retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, §or_a, NULL); + if (retval) { + retval = MacToVFSError(retval); + goto out; + } + + /* + * STEP 1 - aquire new allocation blocks. + */ + nextallocsave = vcb->nextAllocation; + retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes); + if (eflags & kEFMetadataMask) + vcb->nextAllocation = nextallocsave; + + retval = MacToVFSError(retval); + if (retval == 0) { + VTOC(vp)->c_flag |= C_MODIFIED; + if (newbytes < growsize) { + retval = ENOSPC; + goto restore; + } else if (fp->ff_blocks < (headblks + datablks)) { + printf("hfs_relocate: allocation failed"); + retval = ENOSPC; + goto restore; + } + + retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, §or_b, NULL); + if (retval) { + retval = MacToVFSError(retval); + } else if ((sector_a + 1) == sector_b) { + retval = ENOSPC; + goto restore; + } else if ((eflags & kEFMetadataMask) && + ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) > + hfsmp->hfs_metazone_end)) { + printf("hfs_relocate: didn't move into metadata zone\n"); + retval = ENOSPC; + goto restore; + } + } + if (retval) { + /* + * Check to see if failure is due to excessive fragmentation. + */ + if (retval == ENOSPC && + hfs_freeblks(hfsmp, 0) > (datablks * 2)) { + hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; + } + goto out; + } + + fp->ff_size = fp->ff_blocks * blksize; + if (UBCISVALID(vp)) + (void) ubc_setsize(vp, fp->ff_size); + + /* + * STEP 2 - clone data into the new allocation blocks. + */ + + if (vp->v_type == VLNK) + retval = hfs_clonelink(vp, blksize, cred, p); + else if (vp->v_flag & VSYSTEM) + retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); + else + retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p); + + if (retval) + goto restore; + + oldstart = fp->ff_extents[0].startBlock; + + /* + * STEP 3 - switch to clone and remove old blocks. + */ + SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */ + + retval = HeadTruncateFile(vcb, (FCB*)fp, headblks); + + CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */ + if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP)) + wakeup(VTOC(vp)); + if (retval) + goto restore; + + fp->ff_size = realsize; + if (UBCISVALID(vp)) { + (void) ubc_setsize(vp, realsize); + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + } + + CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */ +out: + (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + + retval = VOP_FSYNC(vp, cred, MNT_WAIT, p); +out2: + if (hfsmp->jnl) { + if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID) + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + else + (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + + return (retval); + +restore: + /* + * Give back any newly allocated space. + */ + if (fp->ff_size != realsize) + fp->ff_size = realsize; + (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false); + if (UBCISVALID(vp)) + (void) ubc_setsize(vp, fp->ff_size); + CLR(VTOC(vp)->c_flag, C_RELOCATING); + goto out; +} + + +/* + * Clone a symlink. + * + */ +static int +hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p) +{ + struct buf *head_bp = NULL; + struct buf *tail_bp = NULL; + int error; + + + error = meta_bread(vp, 0, blksize, cred, &head_bp); + if (error) + goto out; + + tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META); + if (tail_bp == NULL) { + error = EIO; + goto out; + } + bcopy(head_bp->b_data, tail_bp->b_data, blksize); + error = bwrite(tail_bp); +out: + if (head_bp) { + head_bp->b_flags |= B_INVAL; + brelse(head_bp); + } + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + + return (error); +} + +/* + * Clone a file's data within the file. + * + */ +static int +hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize, + struct ucred *cred, struct proc *p) +{ + caddr_t bufp; + size_t writebase; + size_t bufsize; + size_t copysize; + size_t iosize; + size_t filesize; + size_t offset; + struct uio auio; + struct iovec aiov; + int devblocksize; + int didhold; + int error; + + + if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) { + printf("hfs_clonefile: vinvalbuf failed - %d\n", error); + return (error); + } + + if (!ubc_clean(vp, 1)) { + printf("hfs_clonefile: not ubc_clean\n"); + return (EIO); /* XXX error code */ + } + + /* + * Suspend page-outs for this file. + */ + SET(VTOC(vp)->c_flag, C_RELOCATING); + + filesize = VTOF(vp)->ff_size; + writebase = blkstart * blksize; + copysize = blkcnt * blksize; + iosize = bufsize = MIN(copysize, 4096 * 16); + offset = 0; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + return (ENOMEM); + } + + VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize); + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + while (offset < copysize) { + iosize = MIN(copysize - offset, iosize); + + aiov.iov_base = bufp; + aiov.iov_len = iosize; + auio.uio_resid = iosize; + auio.uio_offset = offset; + auio.uio_rw = UIO_READ; + + error = cluster_read(vp, &auio, copysize, devblocksize, 0); + if (error) { + printf("hfs_clonefile: cluster_read failed - %d\n", error); + break; + } + if (auio.uio_resid != 0) { + printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid); + error = EIO; + break; + } + + + aiov.iov_base = bufp; + aiov.iov_len = iosize; + auio.uio_resid = iosize; + auio.uio_offset = writebase + offset; + auio.uio_rw = UIO_WRITE; + + error = cluster_write(vp, &auio, filesize + offset, + filesize + offset + iosize, + auio.uio_offset, 0, devblocksize, 0); + if (error) { + printf("hfs_clonefile: cluster_write failed - %d\n", error); + break; + } + if (auio.uio_resid != 0) { + printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); + error = EIO; + break; + } + offset += iosize; + } + if (error == 0) { + /* Clean the pages in VM. */ + didhold = ubc_hold(vp); + if (didhold) + (void) ubc_clean(vp, 1); + + /* + * Clean out all associated buffers. + */ + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + + if (didhold) + ubc_rele(vp); + } + kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); + + return (error); +} + +/* + * Clone a system (metadata) file. + * + */ +static int +hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, + struct ucred *cred, struct proc *p) +{ + caddr_t bufp; + char * offset; + size_t bufsize; + size_t iosize; + struct buf *bp = NULL; + daddr_t blkno; + daddr_t blk; + int breadcnt; + int i; + int error = 0; + + + iosize = GetLogicalBlockSize(vp); + bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); + breadcnt = bufsize / iosize; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + return (ENOMEM); + } + blkstart = (blkstart * blksize) / iosize; + blkcnt = (blkcnt * blksize) / iosize; + blkno = 0; + + while (blkno < blkcnt) { + /* + * Read up to a megabyte + */ + offset = bufp; + for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) { + error = meta_bread(vp, blk, iosize, cred, &bp); + if (error) { + printf("hfs_clonesysfile: meta_bread error %d\n", error); + goto out; + } + if (bp->b_bcount != iosize) { + printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount); + goto out; + } + + bcopy(bp->b_data, offset, iosize); + bp->b_flags |= B_INVAL; + brelse(bp); + bp = NULL; + offset += iosize; + } + + /* + * Write up to a megabyte + */ + offset = bufp; + for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) { + bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META); + if (bp == NULL) { + printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno); + error = EIO; + goto out; + } + bcopy(offset, bp->b_data, iosize); + error = bwrite(bp); + bp = NULL; + if (error) + goto out; + offset += iosize; + } + } +out: + if (bp) { + brelse(bp); + } + + kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); + + error = VOP_FSYNC(vp, cred, MNT_WAIT, p); + + return (error); +} +