X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d12e16782ebf8bb779633dff9e14486293bf6d07..ccc36f2f2d89f9115c479db4439aa5c88de5b44a:/bsd/hfs/hfs_readwrite.c diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index e7fba0413..d49ca795c 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -63,6 +64,10 @@ enum { extern u_int32_t GetLogicalBlockSize(struct vnode *vp); +static int hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *); +static int hfs_clonefile(struct vnode *, int, int, int, struct ucred *, struct proc *); +static int hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *); + /***************************************************************************** * @@ -94,18 +99,16 @@ hfs_read(ap) register struct vnode *vp = ap->a_vp; struct cnode *cp; struct filefork *fp; - struct buf *bp; - daddr_t logBlockNo; - u_long fragSize, moveSize, startOffset, ioxfersize; int devBlockSize = 0; - off_t bytesRemaining; int retval = 0; off_t filesize; off_t filebytes; + off_t start_resid = uio->uio_resid; + /* Preflight checks */ - if (vp->v_type != VREG && vp->v_type != VLNK) - return (EISDIR); /* HFS can only read files */ + if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) + return (EPERM); /* can only read regular files */ if (uio->uio_resid == 0) return (0); /* Nothing left to do */ if (uio->uio_offset < 0) @@ -127,105 +130,29 @@ hfs_read(ap) KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); - if (UBCISVALID(vp)) { - retval = cluster_read(vp, uio, filesize, devBlockSize, 0); - } else { - - for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { - - if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0) - break; - - logBlockNo = (daddr_t)(uio->uio_offset / PAGE_SIZE_64); - startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64); - fragSize = PAGE_SIZE; - - if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize) - ioxfersize = fragSize; - else { - ioxfersize = filesize - (logBlockNo * PAGE_SIZE); - ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1); - } - moveSize = ioxfersize; - moveSize -= startOffset; - - if (bytesRemaining < moveSize) - moveSize = bytesRemaining; - - if (uio->uio_resid < moveSize) { - moveSize = uio->uio_resid; - }; - if (moveSize == 0) { - break; - }; - - if (( uio->uio_offset + fragSize) >= filesize) { - retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp); - - } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) { - daddr_t nextLogBlockNo = logBlockNo + 1; - int nextsize; - - if (((nextLogBlockNo * PAGE_SIZE) + - (daddr_t)fragSize) < filesize) - nextsize = fragSize; - else { - nextsize = filesize - (nextLogBlockNo * PAGE_SIZE); - nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1); - } - retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp); - } else { - retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp); - }; - - if (retval != E_NONE) { - if (bp) { - brelse(bp); - bp = NULL; - } - break; - }; - vp->v_lastr = logBlockNo; - - /* - * We should only get non-zero b_resid when an I/O retval - * has occurred, which should cause us to break above. - * However, if the short read did not cause an retval, - * then we want to ensure that we do not uiomove bad - * or uninitialized data. - */ - ioxfersize -= bp->b_resid; - - if (ioxfersize < moveSize) { /* XXX PPD This should take the offset into account, too! */ - if (ioxfersize == 0) - break; - moveSize = ioxfersize; - } - if ((startOffset + moveSize) > bp->b_bcount) - panic("hfs_read: bad startOffset or moveSize\n"); - - if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio))) - break; - - if (S_ISREG(cp->c_mode) && - (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) { - bp->b_flags |= B_AGE; - }; - - brelse(bp); - /* Start of loop resets bp to NULL before reaching outside this block... */ - } - - if (bp != NULL) { - brelse(bp); - } - } + retval = cluster_read(vp, uio, filesize, devBlockSize, 0); cp->c_flag |= C_ACCESS; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0); + /* + * Keep track blocks read + */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) { + /* + * If this file hasn't been seen since the start of + * the current sampling period then start over. + */ + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) { + fp->ff_bytesread = start_resid - uio->uio_resid; + cp->c_atime = time.tv_sec; + } else { + fp->ff_bytesread += start_resid - uio->uio_resid; + } + } + return (retval); } @@ -253,37 +180,32 @@ hfs_write(ap) struct uio *uio = ap->a_uio; struct cnode *cp; struct filefork *fp; - struct buf *bp; struct proc *p; struct timeval tv; ExtendedVCB *vcb; - int devBlockSize = 0; - daddr_t logBlockNo; - long fragSize; - off_t origFileSize, currOffset, writelimit, bytesToAdd; - off_t actualBytesAdded; - u_long blkoffset, resid, xfersize, clearSize; - int eflags, ioflag; - int retval; + int devBlockSize = 0; + off_t origFileSize, writelimit, bytesToAdd; + off_t actualBytesAdded; + u_long resid; + int eflags, ioflag; + int retval; off_t filebytes; - u_long fileblocks; struct hfsmount *hfsmp; int started_tr = 0, grabbed_lock = 0; - ioflag = ap->a_ioflag; if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (E_NONE); - if (vp->v_type != VREG && vp->v_type != VLNK) - return (EISDIR); /* Can only write files */ + if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp)) + return (EPERM); /* Can only write regular files */ + ioflag = ap->a_ioflag; cp = VTOC(vp); fp = VTOF(vp); vcb = VTOVCB(vp); - fileblocks = fp->ff_blocks; - filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; if (ioflag & IO_APPEND) uio->uio_offset = fp->ff_size; @@ -294,7 +216,7 @@ hfs_write(ap) if (VTOHFS(vp)->jnl && cp->c_datafork) { struct HFSPlusExtentDescriptor *extd; - extd = &cp->c_datafork->ff_data.cf_extents[0]; + extd = &cp->c_datafork->ff_extents[0]; if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) { return EPERM; } @@ -321,19 +243,6 @@ hfs_write(ap) eflags = kEFDeferMask; /* defer file block allocations */ filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; - /* - * NOTE: In the following loop there are two positions tracked: - * currOffset is the current I/O starting offset. currOffset - * is never >LEOF; the LEOF is nudged along with currOffset as - * data is zeroed or written. uio->uio_offset is the start of - * the current I/O operation. It may be arbitrarily beyond - * currOffset. - * - * The following is true at all times: - * currOffset <= LEOF <= uio->uio_offset <= writelimit - */ - currOffset = MIN(uio->uio_offset, fp->ff_size); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); retval = 0; @@ -353,6 +262,20 @@ hfs_write(ap) #endif /* QUOTA */ hfsmp = VTOHFS(vp); + +#ifdef HFS_SPARSE_DEV + /* + * When the underlying device is sparse and space + * is low (< 8MB), stop doing delayed allocations + * and begin doing synchronous I/O. + */ + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && + (hfs_freeblks(hfsmp, 0) < 2048)) { + eflags &= ~kEFDeferMask; + ioflag |= IO_SYNC; + } +#endif /* HFS_SPARSE_DEV */ + if (writelimit > filebytes) { hfs_global_shared_lock_acquire(hfsmp); grabbed_lock = 1; @@ -366,16 +289,19 @@ hfs_write(ap) } while (writelimit > filebytes) { - bytesToAdd = writelimit - filebytes; - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) eflags |= kEFReserveMask; /* lock extents b-tree (also protects volume bitmap) */ retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc()); if (retval != E_NONE) break; - + + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd, 0, eflags, &actualBytesAdded)); @@ -391,6 +317,9 @@ hfs_write(ap) // XXXdbg if (started_tr) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); started_tr = 0; @@ -400,7 +329,7 @@ hfs_write(ap) grabbed_lock = 0; } - if (UBCISVALID(vp) && retval == E_NONE) { + if (retval == E_NONE) { off_t filesize; off_t zero_off; off_t tail_off; @@ -424,8 +353,10 @@ hfs_write(ap) of the transfer to see whether is invalid and should be zero-filled as part of the transfer: */ - if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) - lflag |= IO_HEADZEROFILL; + if (uio->uio_offset > zero_off) { + if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) + lflag |= IO_HEADZEROFILL; + } } else { off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64; @@ -525,105 +456,10 @@ hfs_write(ap) } if (resid > uio->uio_resid) cp->c_flag |= C_CHANGE | C_UPDATE; - } else { - while (retval == E_NONE && uio->uio_resid > 0) { - logBlockNo = currOffset / PAGE_SIZE; - blkoffset = currOffset & PAGE_MASK; - - if ((filebytes - currOffset) < PAGE_SIZE_64) - fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64); - else - fragSize = PAGE_SIZE; - xfersize = fragSize - blkoffset; - - /* Make any adjustments for boundary conditions */ - if (currOffset + (off_t)xfersize > writelimit) - xfersize = writelimit - currOffset; - - /* - * There is no need to read into bp if: - * We start on a block boundary and will overwrite the whole block - * - * OR - */ - if ((blkoffset == 0) && (xfersize >= fragSize)) { - bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ); - retval = 0; - - if (bp->b_blkno == -1) { - brelse(bp); - retval = EIO; /* XXX */ - break; - } - } else { - - if (currOffset == fp->ff_size && blkoffset == 0) { - bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ); - retval = 0; - if (bp->b_blkno == -1) { - brelse(bp); - retval = EIO; /* XXX */ - break; - } - } else { - /* - * This I/O transfer is not sufficiently aligned, - * so read the affected block into a buffer: - */ - retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp); - if (retval != E_NONE) { - if (bp) - brelse(bp); - break; - } - } - } - - /* See if we are starting to write within file boundaries: - * If not, then we need to present a "hole" for the area - * between the current EOF and the start of the current - * I/O operation: - * - * Note that currOffset is only less than uio_offset if - * uio_offset > LEOF... - */ - if (uio->uio_offset > currOffset) { - clearSize = MIN(uio->uio_offset - currOffset, xfersize); - bzero(bp->b_data + blkoffset, clearSize); - currOffset += clearSize; - blkoffset += clearSize; - xfersize -= clearSize; - } - - if (xfersize > 0) { - retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio); - currOffset += xfersize; - } - - if (ioflag & IO_SYNC) { - (void)VOP_BWRITE(bp); - } else if ((xfersize + blkoffset) == fragSize) { - bp->b_flags |= B_AGE; - bawrite(bp); - } else { - bdwrite(bp); - } - - /* Update the EOF if we just extended the file - * (the PEOF has already been moved out and the - * block mapping table has been updated): - */ - if (currOffset > fp->ff_size) { - fp->ff_size = currOffset; - if (UBCISVALID(vp)) - ubc_setsize(vp, fp->ff_size); /* XXX check errors */ - } - if (retval || (resid == 0)) - break; - cp->c_flag |= C_CHANGE | C_UPDATE; - } /* endwhile */ } + HFS_KNOTE(vp, NOTE_WRITE); + ioerr_exit: /* * If we successfully wrote any data, and we are not the superuser @@ -645,6 +481,7 @@ ioerr_exit: tv = time; retval = VOP_UPDATE(vp, &tv, &tv, 1); } + vcb->vcbWrCnt++; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0); @@ -653,6 +490,22 @@ ioerr_exit: } +#ifdef HFS_SPARSE_DEV +struct hfs_backingstoreinfo { + int signature; /* == 3419115 */ + int version; /* version of this struct (1) */ + int backingfd; /* disk image file (on backing fs) */ + int bandsize; /* sparse disk image band size */ +}; + +#define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo) +#define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8) + +#define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO) +#define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO) + +#endif /* HFS_SPARSE_DEV */ + /* #% ioctl vp U U U @@ -681,10 +534,127 @@ hfs_ioctl(ap) } */ *ap; { switch (ap->a_command) { - case 1: { + +#ifdef HFS_SPARSE_DEV + case HFS_SETBACKINGSTOREINFO: { + struct hfsmount * hfsmp; + struct vnode * bsfs_rootvp; + struct vnode * di_vp; + struct file * di_fp; + struct hfs_backingstoreinfo *bsdata; + int error = 0; + + hfsmp = VTOHFS(ap->a_vp); + if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) { + return (EALREADY); + } + if (ap->a_p->p_ucred->cr_uid != 0 && + ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + return (EACCES); /* must be owner of file system */ + } + bsdata = (struct hfs_backingstoreinfo *)ap->a_data; + if (bsdata == NULL) { + return (EINVAL); + } + if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) { + return (error); + } + if (fref(di_fp) == -1) { + return (EBADF); + } + if (di_fp->f_type != DTYPE_VNODE) { + frele(di_fp); + return (EINVAL); + } + di_vp = (struct vnode *)di_fp->f_data; + if (ap->a_vp->v_mount == di_vp->v_mount) { + frele(di_fp); + return (EINVAL); + } + + /* + * Obtain the backing fs root vnode and keep a reference + * on it. This reference will be dropped in hfs_unmount. + */ + error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp); + if (error) { + frele(di_fp); + return (error); + } + VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p); /* Hold on to the reference */ + + hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; + hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; + hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; + hfsmp->hfs_sparsebandblks *= 4; + + frele(di_fp); + return (0); + } + case HFS_CLRBACKINGSTOREINFO: { + struct hfsmount * hfsmp; + struct vnode * tmpvp; + + hfsmp = VTOHFS(ap->a_vp); + if (ap->a_p->p_ucred->cr_uid != 0 && + ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) { + return (EACCES); /* must be owner of file system */ + } + if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && + hfsmp->hfs_backingfs_rootvp) { + + hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; + tmpvp = hfsmp->hfs_backingfs_rootvp; + hfsmp->hfs_backingfs_rootvp = NULLVP; + hfsmp->hfs_sparsebandblks = 0; + vrele(tmpvp); + } + return (0); + } +#endif /* HFS_SPARSE_DEV */ + + case 6: { + int error; + + ap->a_vp->v_flag |= VFULLFSYNC; + error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p); + ap->a_vp->v_flag &= ~VFULLFSYNC; + + return error; + } + case 5: { + register struct vnode *vp; register struct cnode *cp; + struct filefork *fp; + int error; + + vp = ap->a_vp; + cp = VTOC(vp); + fp = VTOF(vp); + + if (vp->v_type != VREG) + return EINVAL; + + VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ); + error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); + if (error) + return (error); + + /* + * used by regression test to determine if + * all the dirty pages (via write) have been cleaned + * after a call to 'fsysnc'. + */ + error = is_file_clean(vp, fp->ff_size); + VOP_UNLOCK(vp, 0, ap->a_p); + + return (error); + } + + case 1: { register struct vnode *vp; register struct radvisory *ra; + register struct cnode *cp; struct filefork *fp; int devBlockSize = 0; int error; @@ -989,6 +959,7 @@ hfs_cmap(ap) struct rl_entry *invalid_range; enum rl_overlaptype overlaptype; int started_tr = 0, grabbed_lock = 0; + struct timeval tv; /* * Check for underlying vnode requests and ensure that logical @@ -998,6 +969,17 @@ hfs_cmap(ap) return (0); p = current_proc(); + + if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) { + /* + * File blocks are getting remapped. Wait until its finished. + */ + SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP); + (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0); + if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) + panic("hfs_cmap: no mappable blocks"); + } + retry: if (fp->ff_unallocblocks) { lockExtBtree = 1; @@ -1037,7 +1019,7 @@ hfs_cmap(ap) if (fp->ff_unallocblocks) { SInt64 reqbytes, actbytes; - // + // // Make sure we have a transaction. It's possible // that we came in and fp->ff_unallocblocks was zero // but during the time we blocked acquiring the extents @@ -1049,7 +1031,7 @@ hfs_cmap(ap) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); lockExtBtree = 0; } - + goto retry; } @@ -1068,6 +1050,10 @@ hfs_cmap(ap) fp->ff_blocks -= fp->ff_unallocblocks; fp->ff_unallocblocks = 0; + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } while (retval == 0 && reqbytes > 0) { retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp), (FCB*)fp, reqbytes, 0, @@ -1087,7 +1073,11 @@ hfs_cmap(ap) if (retval) { (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p); + VTOC(ap->a_vp)->c_flag |= C_MODIFIED; if (started_tr) { + tv = time; + VOP_UPDATE(ap->a_vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); } @@ -1096,7 +1086,6 @@ hfs_cmap(ap) } return (retval); } - VTOC(ap->a_vp)->c_flag |= C_MODIFIED; } retval = MacToVFSError( @@ -1112,6 +1101,9 @@ hfs_cmap(ap) // XXXdbg if (started_tr) { + tv = time; + retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); started_tr = 0; @@ -1358,21 +1350,7 @@ hfs_strategy(ap) } -/* -# -#% truncate vp L L L -# -vop_truncate { - IN struct vnode *vp; - IN off_t length; - IN int flags; (IO_SYNC) - IN struct ucred *cred; - IN struct proc *p; -}; - * Truncate a cnode to at most length size, freeing (or adding) the - * disk blocks. - */ -int hfs_truncate(ap) +static int do_hfs_truncate(ap) struct vop_truncate_args /* { struct vnode *a_vp; off_t a_length; @@ -1417,6 +1395,11 @@ int hfs_truncate(ap) tv = time; retval = E_NONE; + /* Files that are changing size are not hot file candidates. */ + if (hfsmp->hfc_stage == HFC_RECORDING) { + fp->ff_bytesread = 0; + } + /* * We cannot just check if fp->ff_size == length (as an optimization) * since there may be extra physical blocks that also need truncation. @@ -1444,13 +1427,23 @@ int hfs_truncate(ap) */ if (length > filebytes) { int eflags; + u_long blockHint = 0; /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) eflags |= kEFReserveMask; /* keep a reserve */ + /* + * Allocate Journal and Quota files in metadata zone. + */ + if (filebytes == 0 && + hfsmp->hfs_flags & HFS_METADATA_ZONE && + hfs_virtualmetafile(cp)) { + eflags |= kEFMetadataMask; + blockHint = hfsmp->hfs_metazone_start; + } // XXXdbg hfs_global_shared_lock_acquire(hfsmp); if (hfsmp->jnl) { @@ -1476,7 +1469,7 @@ int hfs_truncate(ap) retval = MacToVFSError(ExtendFileC(VTOVCB(vp), (FCB*)fp, bytesToAdd, - 0, + blockHint, eflags, &actualBytesAdded)); @@ -1492,6 +1485,9 @@ int hfs_truncate(ap) // XXXdbg if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); } @@ -1639,6 +1635,9 @@ int hfs_truncate(ap) // XXXdbg if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); } @@ -1673,6 +1672,83 @@ Err_Exit: } +/* +# +#% truncate vp L L L +# +vop_truncate { + IN struct vnode *vp; + IN off_t length; + IN int flags; (IO_SYNC) + IN struct ucred *cred; + IN struct proc *p; +}; + * Truncate a cnode to at most length size, freeing (or adding) the + * disk blocks. + */ +int hfs_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct cnode *cp = VTOC(vp); + struct filefork *fp = VTOF(vp); + off_t length; + off_t filebytes; + u_long fileblocks; + int blksize, error; + u_int64_t nsize; + + if (vp->v_type != VREG && vp->v_type != VLNK) + return (EISDIR); /* cannot truncate an HFS directory! */ + + length = ap->a_length; + blksize = VTOVCB(vp)->blockSize; + fileblocks = fp->ff_blocks; + filebytes = (off_t)fileblocks * (off_t)blksize; + + // have to loop truncating or growing files that are + // really big because otherwise transactions can get + // enormous and consume too many kernel resources. + if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) { + while (filebytes > length) { + if ((filebytes - length) > HFS_BIGFILE_SIZE) { + filebytes -= HFS_BIGFILE_SIZE; + } else { + filebytes = length; + } + + ap->a_length = filebytes; + error = do_hfs_truncate(ap); + if (error) + break; + } + } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) { + while (filebytes < length) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { + filebytes += HFS_BIGFILE_SIZE; + } else { + filebytes = (length - filebytes); + } + + ap->a_length = filebytes; + error = do_hfs_truncate(ap); + if (error) + break; + } + } else { + error = do_hfs_truncate(ap); + } + + return error; +} + + /* # @@ -1703,6 +1779,7 @@ int hfs_allocate(ap) struct vnode *vp = ap->a_vp; struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); + ExtendedVCB *vcb = VTOVCB(vp); off_t length = ap->a_length; off_t startingPEOF; off_t moreBytesRequested; @@ -1713,31 +1790,30 @@ int hfs_allocate(ap) struct timeval tv; int retval, retval2; UInt32 blockHint; - UInt32 extendFlags =0; /* For call to ExtendFileC */ + UInt32 extendFlags; /* For call to ExtendFileC */ struct hfsmount *hfsmp; hfsmp = VTOHFS(vp); *(ap->a_bytesallocated) = 0; fileblocks = fp->ff_blocks; - filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fileblocks * (off_t)vcb->blockSize; if (length < (off_t)0) return (EINVAL); - if (vp->v_type != VREG && vp->v_type != VLNK) + if (vp->v_type != VREG) return (EISDIR); - if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes)) + if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) return (EINVAL); /* Fill in the flags word for the call to Extend the file */ + extendFlags = kEFNoClumpMask; if (ap->a_flags & ALLOCATECONTIG) extendFlags |= kEFContigMask; - if (ap->a_flags & ALLOCATEALL) extendFlags |= kEFAllMask; - - if (suser(ap->a_cred, NULL) != 0) + if (ap->a_cred && suser(ap->a_cred, NULL) != 0) extendFlags |= kEFReserveMask; tv = time; @@ -1764,12 +1840,31 @@ int hfs_allocate(ap) #if QUOTA retval = hfs_chkdq(cp, - (int64_t)(roundup(moreBytesRequested, VTOVCB(vp)->blockSize)), + (int64_t)(roundup(moreBytesRequested, vcb->blockSize)), ap->a_cred, 0); if (retval) return (retval); #endif /* QUOTA */ + /* + * Metadata zone checks. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* + * Allocate Journal and Quota files in metadata zone. + */ + if (hfs_virtualmetafile(cp)) { + extendFlags |= kEFMetadataMask; + blockHint = hfsmp->hfs_metazone_start; + } else if ((blockHint >= hfsmp->hfs_metazone_start) && + (blockHint <= hfsmp->hfs_metazone_end)) { + /* + * Move blockHint outside metadata zone. + */ + blockHint = hfsmp->hfs_metazone_end + 1; + } + } + // XXXdbg hfs_global_shared_lock_acquire(hfsmp); if (hfsmp->jnl) { @@ -1789,7 +1884,7 @@ int hfs_allocate(ap) goto Err_Exit; } - retval = MacToVFSError(ExtendFileC(VTOVCB(vp), + retval = MacToVFSError(ExtendFileC(vcb, (FCB*)fp, moreBytesRequested, blockHint, @@ -1797,12 +1892,15 @@ int hfs_allocate(ap) &actualBytesAdded)); *(ap->a_bytesallocated) = actualBytesAdded; - filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); // XXXdbg if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); } @@ -1824,7 +1922,7 @@ int hfs_allocate(ap) */ if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded)) *(ap->a_bytesallocated) = - roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize); + roundup(moreBytesRequested, (off_t)vcb->blockSize); } else { /* Shorten the size of the file */ @@ -1860,14 +1958,17 @@ int hfs_allocate(ap) retval = MacToVFSError( TruncateFileC( - VTOVCB(vp), + vcb, (FCB*)fp, length, false)); (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p); - filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize; + filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; if (hfsmp->jnl) { + tv = time; + VOP_UPDATE(vp, &tv, &tv, 1); + hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); journal_end_transaction(hfsmp->jnl); } @@ -1922,7 +2023,7 @@ hfs_pagein(ap) int devBlockSize = 0; int error; - if (vp->v_type != VREG && vp->v_type != VLNK) + if (vp->v_type != VREG) panic("hfs_pagein: vp not UBC type\n"); VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize); @@ -1930,6 +2031,25 @@ hfs_pagein(ap) error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize, ap->a_flags); + /* + * Keep track blocks read + */ + if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) { + struct cnode *cp; + + cp = VTOC(vp); + /* + * If this file hasn't been seen since the start of + * the current sampling period then start over. + */ + if (cp->c_atime < VTOHFS(vp)->hfc_timebase) + VTOF(vp)->ff_bytesread = ap->a_size; + else + VTOF(vp)->ff_bytesread += ap->a_size; + + cp->c_flag |= C_ACCESS; + } + return (error); } @@ -1963,10 +2083,18 @@ hfs_pageout(ap) filesize = fp->ff_size; end_of_range = ap->a_f_offset + ap->a_size - 1; + if (cp->c_flag & C_RELOCATING) { + if (end_of_range < (filesize / 2)) { + return (EBUSY); + } + } + if (end_of_range >= filesize) end_of_range = (off_t)(filesize - 1); - if (ap->a_f_offset < filesize) + if (ap->a_f_offset < filesize) { rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges); + cp->c_flag |= C_MODIFIED; /* leof is dirty */ + } retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size, filesize, devBlockSize, ap->a_flags); @@ -2033,3 +2161,476 @@ hfs_bwrite(ap) return (retval); } + +/* + * Relocate a file to a new location on disk + * cnode must be locked on entry + * + * Relocation occurs by cloning the file's data from its + * current set of blocks to a new set of blocks. During + * the relocation all of the blocks (old and new) are + * owned by the file. + * + * ----------------- + * |///////////////| + * ----------------- + * 0 N (file offset) + * + * ----------------- ----------------- + * |///////////////| | | STEP 1 (aquire new blocks) + * ----------------- ----------------- + * 0 N N+1 2N + * + * ----------------- ----------------- + * |///////////////| |///////////////| STEP 2 (clone data) + * ----------------- ----------------- + * 0 N N+1 2N + * + * ----------------- + * |///////////////| STEP 3 (head truncate blocks) + * ----------------- + * 0 N + * + * During steps 2 and 3 page-outs to file offsets less + * than or equal to N are suspended. + * + * During step 3 page-ins to the file get supended. + */ +__private_extern__ +int +hfs_relocate(vp, blockHint, cred, p) + struct vnode *vp; + u_int32_t blockHint; + struct ucred *cred; + struct proc *p; +{ + struct filefork *fp; + struct hfsmount *hfsmp; + ExtendedVCB *vcb; + + u_int32_t headblks; + u_int32_t datablks; + u_int32_t blksize; + u_int32_t realsize; + u_int32_t growsize; + u_int32_t nextallocsave; + u_int32_t sector_a; + u_int32_t sector_b; + int eflags; + u_int32_t oldstart; /* debug only */ + off_t newbytes; + int retval, need_vinval=0; + + if (vp->v_type != VREG && vp->v_type != VLNK) { + return (EPERM); + } + + hfsmp = VTOHFS(vp); + if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) { + return (ENOSPC); + } + + fp = VTOF(vp); + if (fp->ff_unallocblocks) + return (EINVAL); + vcb = VTOVCB(vp); + blksize = vcb->blockSize; + if (blockHint == 0) + blockHint = vcb->nextAllocation; + + if ((fp->ff_size > (u_int64_t)0x7fffffff) || + (vp->v_type == VLNK && fp->ff_size > blksize)) { + return (EFBIG); + } + + headblks = fp->ff_blocks; + datablks = howmany(fp->ff_size, blksize); + growsize = datablks * blksize; + realsize = fp->ff_size; + eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask; + if (blockHint >= hfsmp->hfs_metazone_start && + blockHint <= hfsmp->hfs_metazone_end) + eflags |= kEFMetadataMask; + + hfs_global_shared_lock_acquire(hfsmp); + if (hfsmp->jnl) { + if (journal_start_transaction(hfsmp->jnl) != 0) { + return (EINVAL); + } + } + + /* Lock extents b-tree (also protects volume bitmap) */ + retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p); + if (retval) + goto out2; + + retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, §or_a, NULL); + if (retval) { + retval = MacToVFSError(retval); + goto out; + } + + /* + * STEP 1 - aquire new allocation blocks. + */ + nextallocsave = vcb->nextAllocation; + retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes); + if (eflags & kEFMetadataMask) + vcb->nextAllocation = nextallocsave; + + retval = MacToVFSError(retval); + if (retval == 0) { + VTOC(vp)->c_flag |= C_MODIFIED; + if (newbytes < growsize) { + retval = ENOSPC; + goto restore; + } else if (fp->ff_blocks < (headblks + datablks)) { + printf("hfs_relocate: allocation failed"); + retval = ENOSPC; + goto restore; + } + + retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, §or_b, NULL); + if (retval) { + retval = MacToVFSError(retval); + } else if ((sector_a + 1) == sector_b) { + retval = ENOSPC; + goto restore; + } else if ((eflags & kEFMetadataMask) && + ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) > + hfsmp->hfs_metazone_end)) { + printf("hfs_relocate: didn't move into metadata zone\n"); + retval = ENOSPC; + goto restore; + } + } + if (retval) { + /* + * Check to see if failure is due to excessive fragmentation. + */ + if (retval == ENOSPC && + hfs_freeblks(hfsmp, 0) > (datablks * 2)) { + hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE; + } + goto out; + } + + fp->ff_size = fp->ff_blocks * blksize; + if (UBCISVALID(vp)) + (void) ubc_setsize(vp, fp->ff_size); + + /* + * STEP 2 - clone data into the new allocation blocks. + */ + + // XXXdbg - unlock the extents overflow file because hfs_clonefile() + // calls vinvalbuf() which calls hfs_fsync() which can + // call hfs_metasync() which may need to lock the catalog + // file -- but the catalog file may be locked and blocked + // waiting for the extents overflow file if we're unlucky. + // see radar 3742973 for more details. + (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + + if (vp->v_type == VLNK) + retval = hfs_clonelink(vp, blksize, cred, p); + else if (vp->v_flag & VSYSTEM) + retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p); + else + retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p); + + // XXXdbg - relock the extents overflow file + (void)hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p); + + if (retval) + goto restore; + + oldstart = fp->ff_extents[0].startBlock; + + /* + * STEP 3 - switch to clone and remove old blocks. + */ + SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */ + + retval = HeadTruncateFile(vcb, (FCB*)fp, headblks); + + CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */ + if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP)) + wakeup(VTOC(vp)); + if (retval) + goto restore; + + fp->ff_size = realsize; + if (UBCISVALID(vp)) { + (void) ubc_setsize(vp, realsize); + need_vinval = 1; + } + + CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */ +out: + (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p); + + // XXXdbg - do this after unlocking the extents-overflow + // file to avoid deadlocks (see comment above by STEP 2) + if (need_vinval) { + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + } + + retval = VOP_FSYNC(vp, cred, MNT_WAIT, p); +out2: + if (hfsmp->jnl) { + if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID) + (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH); + else + (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0); + journal_end_transaction(hfsmp->jnl); + } + hfs_global_shared_lock_release(hfsmp); + + return (retval); + +restore: + /* + * Give back any newly allocated space. + */ + if (fp->ff_size != realsize) + fp->ff_size = realsize; + (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false); + if (UBCISVALID(vp)) + (void) ubc_setsize(vp, fp->ff_size); + CLR(VTOC(vp)->c_flag, C_RELOCATING); + goto out; +} + + +/* + * Clone a symlink. + * + */ +static int +hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p) +{ + struct buf *head_bp = NULL; + struct buf *tail_bp = NULL; + int error; + + + error = meta_bread(vp, 0, blksize, cred, &head_bp); + if (error) + goto out; + + tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META); + if (tail_bp == NULL) { + error = EIO; + goto out; + } + bcopy(head_bp->b_data, tail_bp->b_data, blksize); + error = bwrite(tail_bp); +out: + if (head_bp) { + head_bp->b_flags |= B_INVAL; + brelse(head_bp); + } + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + + return (error); +} + +/* + * Clone a file's data within the file. + * + */ +static int +hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize, + struct ucred *cred, struct proc *p) +{ + caddr_t bufp; + size_t writebase; + size_t bufsize; + size_t copysize; + size_t iosize; + size_t filesize; + size_t offset; + struct uio auio; + struct iovec aiov; + int devblocksize; + int didhold; + int error; + + + if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) { + printf("hfs_clonefile: vinvalbuf failed - %d\n", error); + return (error); + } + + if (!ubc_clean(vp, 1)) { + printf("hfs_clonefile: not ubc_clean\n"); + return (EIO); /* XXX error code */ + } + + /* + * Suspend page-outs for this file. + */ + SET(VTOC(vp)->c_flag, C_RELOCATING); + + filesize = VTOF(vp)->ff_size; + writebase = blkstart * blksize; + copysize = blkcnt * blksize; + iosize = bufsize = MIN(copysize, 4096 * 16); + offset = 0; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + return (ENOMEM); + } + + VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize); + + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + while (offset < copysize) { + iosize = MIN(copysize - offset, iosize); + + aiov.iov_base = bufp; + aiov.iov_len = iosize; + auio.uio_resid = iosize; + auio.uio_offset = offset; + auio.uio_rw = UIO_READ; + + error = cluster_read(vp, &auio, copysize, devblocksize, 0); + if (error) { + printf("hfs_clonefile: cluster_read failed - %d\n", error); + break; + } + if (auio.uio_resid != 0) { + printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid); + error = EIO; + break; + } + + + aiov.iov_base = bufp; + aiov.iov_len = iosize; + auio.uio_resid = iosize; + auio.uio_offset = writebase + offset; + auio.uio_rw = UIO_WRITE; + + error = cluster_write(vp, &auio, filesize + offset, + filesize + offset + iosize, + auio.uio_offset, 0, devblocksize, 0); + if (error) { + printf("hfs_clonefile: cluster_write failed - %d\n", error); + break; + } + if (auio.uio_resid != 0) { + printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n"); + error = EIO; + break; + } + offset += iosize; + } + if (error == 0) { + /* Clean the pages in VM. */ + didhold = ubc_hold(vp); + if (didhold) + (void) ubc_clean(vp, 1); + + /* + * Clean out all associated buffers. + */ + (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0); + + if (didhold) + ubc_rele(vp); + } + kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); + + return (error); +} + +/* + * Clone a system (metadata) file. + * + */ +static int +hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize, + struct ucred *cred, struct proc *p) +{ + caddr_t bufp; + char * offset; + size_t bufsize; + size_t iosize; + struct buf *bp = NULL; + daddr_t blkno; + daddr_t blk; + int breadcnt; + int i; + int error = 0; + + + iosize = GetLogicalBlockSize(vp); + bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1); + breadcnt = bufsize / iosize; + + if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) { + return (ENOMEM); + } + blkstart = (blkstart * blksize) / iosize; + blkcnt = (blkcnt * blksize) / iosize; + blkno = 0; + + while (blkno < blkcnt) { + /* + * Read up to a megabyte + */ + offset = bufp; + for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) { + error = meta_bread(vp, blk, iosize, cred, &bp); + if (error) { + printf("hfs_clonesysfile: meta_bread error %d\n", error); + goto out; + } + if (bp->b_bcount != iosize) { + printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount); + goto out; + } + + bcopy(bp->b_data, offset, iosize); + bp->b_flags |= B_INVAL; + brelse(bp); + bp = NULL; + offset += iosize; + } + + /* + * Write up to a megabyte + */ + offset = bufp; + for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) { + bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META); + if (bp == NULL) { + printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno); + error = EIO; + goto out; + } + bcopy(offset, bp->b_data, iosize); + error = bwrite(bp); + bp = NULL; + if (error) + goto out; + offset += iosize; + } + } +out: + if (bp) { + brelse(bp); + } + + kmem_free(kernel_map, (vm_offset_t)bufp, bufsize); + + error = VOP_FSYNC(vp, cred, MNT_WAIT, p); + + return (error); +} +