+
+/*
+ * Relocate a file to a new location on disk
+ * cnode must be locked on entry
+ *
+ * Relocation occurs by cloning the file's data from its
+ * current set of blocks to a new set of blocks. During
+ * the relocation all of the blocks (old and new) are
+ * owned by the file.
+ *
+ * -----------------
+ * |///////////////|
+ * -----------------
+ * 0 N (file offset)
+ *
+ * ----------------- -----------------
+ * |///////////////| | | STEP 1 (aquire new blocks)
+ * ----------------- -----------------
+ * 0 N N+1 2N
+ *
+ * ----------------- -----------------
+ * |///////////////| |///////////////| STEP 2 (clone data)
+ * ----------------- -----------------
+ * 0 N N+1 2N
+ *
+ * -----------------
+ * |///////////////| STEP 3 (head truncate blocks)
+ * -----------------
+ * 0 N
+ *
+ * During steps 2 and 3 page-outs to file offsets less
+ * than or equal to N are suspended.
+ *
+ * During step 3 page-ins to the file get supended.
+ */
+__private_extern__
+int
+hfs_relocate(vp, blockHint, cred, p)
+ struct vnode *vp;
+ u_int32_t blockHint;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct filefork *fp;
+ struct hfsmount *hfsmp;
+ ExtendedVCB *vcb;
+
+ u_int32_t headblks;
+ u_int32_t datablks;
+ u_int32_t blksize;
+ u_int32_t realsize;
+ u_int32_t growsize;
+ u_int32_t nextallocsave;
+ u_int32_t sector_a;
+ u_int32_t sector_b;
+ int eflags;
+ u_int32_t oldstart; /* debug only */
+ off_t newbytes;
+ int retval, need_vinval=0;
+
+ if (vp->v_type != VREG && vp->v_type != VLNK) {
+ return (EPERM);
+ }
+
+ hfsmp = VTOHFS(vp);
+ if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
+ return (ENOSPC);
+ }
+
+ fp = VTOF(vp);
+ if (fp->ff_unallocblocks)
+ return (EINVAL);
+ vcb = VTOVCB(vp);
+ blksize = vcb->blockSize;
+ if (blockHint == 0)
+ blockHint = vcb->nextAllocation;
+
+ if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
+ (vp->v_type == VLNK && fp->ff_size > blksize)) {
+ return (EFBIG);
+ }
+
+ headblks = fp->ff_blocks;
+ datablks = howmany(fp->ff_size, blksize);
+ growsize = datablks * blksize;
+ realsize = fp->ff_size;
+ eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
+ if (blockHint >= hfsmp->hfs_metazone_start &&
+ blockHint <= hfsmp->hfs_metazone_end)
+ eflags |= kEFMetadataMask;
+
+ hfs_global_shared_lock_acquire(hfsmp);
+ if (hfsmp->jnl) {
+ if (journal_start_transaction(hfsmp->jnl) != 0) {
+ return (EINVAL);
+ }
+ }
+
+ /* Lock extents b-tree (also protects volume bitmap) */
+ retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
+ if (retval)
+ goto out2;
+
+ retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, §or_a, NULL);
+ if (retval) {
+ retval = MacToVFSError(retval);
+ goto out;
+ }
+
+ /*
+ * STEP 1 - aquire new allocation blocks.
+ */
+ nextallocsave = vcb->nextAllocation;
+ retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
+ if (eflags & kEFMetadataMask)
+ vcb->nextAllocation = nextallocsave;
+
+ retval = MacToVFSError(retval);
+ if (retval == 0) {
+ VTOC(vp)->c_flag |= C_MODIFIED;
+ if (newbytes < growsize) {
+ retval = ENOSPC;
+ goto restore;
+ } else if (fp->ff_blocks < (headblks + datablks)) {
+ printf("hfs_relocate: allocation failed");
+ retval = ENOSPC;
+ goto restore;
+ }
+
+ retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, §or_b, NULL);
+ if (retval) {
+ retval = MacToVFSError(retval);
+ } else if ((sector_a + 1) == sector_b) {
+ retval = ENOSPC;
+ goto restore;
+ } else if ((eflags & kEFMetadataMask) &&
+ ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+ hfsmp->hfs_metazone_end)) {
+ printf("hfs_relocate: didn't move into metadata zone\n");
+ retval = ENOSPC;
+ goto restore;
+ }
+ }
+ if (retval) {
+ /*
+ * Check to see if failure is due to excessive fragmentation.
+ */
+ if (retval == ENOSPC &&
+ hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
+ hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
+ }
+ goto out;
+ }
+
+ fp->ff_size = fp->ff_blocks * blksize;
+ if (UBCISVALID(vp))
+ (void) ubc_setsize(vp, fp->ff_size);
+
+ /*
+ * STEP 2 - clone data into the new allocation blocks.
+ */
+
+ // XXXdbg - unlock the extents overflow file because hfs_clonefile()
+ // calls vinvalbuf() which calls hfs_fsync() which can
+ // call hfs_metasync() which may need to lock the catalog
+ // file -- but the catalog file may be locked and blocked
+ // waiting for the extents overflow file if we're unlucky.
+ // see radar 3742973 for more details.
+ (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+
+ if (vp->v_type == VLNK)
+ retval = hfs_clonelink(vp, blksize, cred, p);
+ else if (vp->v_flag & VSYSTEM)
+ retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
+ else
+ retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
+
+ // XXXdbg - relock the extents overflow file
+ (void)hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
+
+ if (retval)
+ goto restore;
+
+ oldstart = fp->ff_extents[0].startBlock;
+
+ /*
+ * STEP 3 - switch to clone and remove old blocks.
+ */
+ SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */
+
+ retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
+
+ CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */
+ if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
+ wakeup(VTOC(vp));
+ if (retval)
+ goto restore;
+
+ fp->ff_size = realsize;
+ if (UBCISVALID(vp)) {
+ (void) ubc_setsize(vp, realsize);
+ need_vinval = 1;
+ }
+
+ CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */
+out:
+ (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
+
+ // XXXdbg - do this after unlocking the extents-overflow
+ // file to avoid deadlocks (see comment above by STEP 2)
+ if (need_vinval) {
+ (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+ }
+
+ retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+out2:
+ if (hfsmp->jnl) {
+ if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
+ (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
+ else
+ (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
+ journal_end_transaction(hfsmp->jnl);
+ }
+ hfs_global_shared_lock_release(hfsmp);
+
+ return (retval);
+
+restore:
+ /*
+ * Give back any newly allocated space.
+ */
+ if (fp->ff_size != realsize)
+ fp->ff_size = realsize;
+ (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
+ if (UBCISVALID(vp))
+ (void) ubc_setsize(vp, fp->ff_size);
+ CLR(VTOC(vp)->c_flag, C_RELOCATING);
+ goto out;
+}
+
+
+/*
+ * Clone a symlink.
+ *
+ */
+static int
+hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
+{
+ struct buf *head_bp = NULL;
+ struct buf *tail_bp = NULL;
+ int error;
+
+
+ error = meta_bread(vp, 0, blksize, cred, &head_bp);
+ if (error)
+ goto out;
+
+ tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
+ if (tail_bp == NULL) {
+ error = EIO;
+ goto out;
+ }
+ bcopy(head_bp->b_data, tail_bp->b_data, blksize);
+ error = bwrite(tail_bp);
+out:
+ if (head_bp) {
+ head_bp->b_flags |= B_INVAL;
+ brelse(head_bp);
+ }
+ (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+
+ return (error);
+}
+
+/*
+ * Clone a file's data within the file.
+ *
+ */
+static int
+hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+ struct ucred *cred, struct proc *p)
+{
+ caddr_t bufp;
+ size_t writebase;
+ size_t bufsize;
+ size_t copysize;
+ size_t iosize;
+ size_t filesize;
+ size_t offset;
+ struct uio auio;
+ struct iovec aiov;
+ int devblocksize;
+ int didhold;
+ int error;
+
+
+ if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
+ printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
+ return (error);
+ }
+
+ if (!ubc_clean(vp, 1)) {
+ printf("hfs_clonefile: not ubc_clean\n");
+ return (EIO); /* XXX error code */
+ }
+
+ /*
+ * Suspend page-outs for this file.
+ */
+ SET(VTOC(vp)->c_flag, C_RELOCATING);
+
+ filesize = VTOF(vp)->ff_size;
+ writebase = blkstart * blksize;
+ copysize = blkcnt * blksize;
+ iosize = bufsize = MIN(copysize, 4096 * 16);
+ offset = 0;
+
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ return (ENOMEM);
+ }
+
+ VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
+
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+
+ while (offset < copysize) {
+ iosize = MIN(copysize - offset, iosize);
+
+ aiov.iov_base = bufp;
+ aiov.iov_len = iosize;
+ auio.uio_resid = iosize;
+ auio.uio_offset = offset;
+ auio.uio_rw = UIO_READ;
+
+ error = cluster_read(vp, &auio, copysize, devblocksize, 0);
+ if (error) {
+ printf("hfs_clonefile: cluster_read failed - %d\n", error);
+ break;
+ }
+ if (auio.uio_resid != 0) {
+ printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
+ error = EIO;
+ break;
+ }
+
+
+ aiov.iov_base = bufp;
+ aiov.iov_len = iosize;
+ auio.uio_resid = iosize;
+ auio.uio_offset = writebase + offset;
+ auio.uio_rw = UIO_WRITE;
+
+ error = cluster_write(vp, &auio, filesize + offset,
+ filesize + offset + iosize,
+ auio.uio_offset, 0, devblocksize, 0);
+ if (error) {
+ printf("hfs_clonefile: cluster_write failed - %d\n", error);
+ break;
+ }
+ if (auio.uio_resid != 0) {
+ printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
+ error = EIO;
+ break;
+ }
+ offset += iosize;
+ }
+ if (error == 0) {
+ /* Clean the pages in VM. */
+ didhold = ubc_hold(vp);
+ if (didhold)
+ (void) ubc_clean(vp, 1);
+
+ /*
+ * Clean out all associated buffers.
+ */
+ (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
+
+ if (didhold)
+ ubc_rele(vp);
+ }
+ kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+ return (error);
+}
+
+/*
+ * Clone a system (metadata) file.
+ *
+ */
+static int
+hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
+ struct ucred *cred, struct proc *p)
+{
+ caddr_t bufp;
+ char * offset;
+ size_t bufsize;
+ size_t iosize;
+ struct buf *bp = NULL;
+ daddr_t blkno;
+ daddr_t blk;
+ int breadcnt;
+ int i;
+ int error = 0;
+
+
+ iosize = GetLogicalBlockSize(vp);
+ bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
+ breadcnt = bufsize / iosize;
+
+ if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
+ return (ENOMEM);
+ }
+ blkstart = (blkstart * blksize) / iosize;
+ blkcnt = (blkcnt * blksize) / iosize;
+ blkno = 0;
+
+ while (blkno < blkcnt) {
+ /*
+ * Read up to a megabyte
+ */
+ offset = bufp;
+ for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
+ error = meta_bread(vp, blk, iosize, cred, &bp);
+ if (error) {
+ printf("hfs_clonesysfile: meta_bread error %d\n", error);
+ goto out;
+ }
+ if (bp->b_bcount != iosize) {
+ printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
+ goto out;
+ }
+
+ bcopy(bp->b_data, offset, iosize);
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ bp = NULL;
+ offset += iosize;
+ }
+
+ /*
+ * Write up to a megabyte
+ */
+ offset = bufp;
+ for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
+ bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
+ if (bp == NULL) {
+ printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
+ error = EIO;
+ goto out;
+ }
+ bcopy(offset, bp->b_data, iosize);
+ error = bwrite(bp);
+ bp = NULL;
+ if (error)
+ goto out;
+ offset += iosize;
+ }
+ }
+out:
+ if (bp) {
+ brelse(bp);
+ }
+
+ kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
+
+ error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
+
+ return (error);
+}
+