X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..c18c124eaa464aaaa5549e99e5a70fc9cbb50944:/bsd/hfs/hfs_readwrite.c diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index a3f653fc4..f09bdc7d2 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2013 Apple Inc. All rights reserved. + * Copyright (c) 2000-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,6 +54,7 @@ #include #include #include +#include #include @@ -125,6 +126,7 @@ hfs_vnop_read(struct vnop_read_args *ap) int retval = 0; int took_truncate_lock = 0; int io_throttle = 0; + int throttled_count = 0; /* Preflight checks */ if (!vnode_isreg(vp)) { @@ -138,8 +140,14 @@ hfs_vnop_read(struct vnop_read_args *ap) return (0); /* Nothing left to do */ if (offset < 0) return (EINVAL); /* cant read from a negative offset */ - - + + if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == + (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { + /* Don't allow unencrypted io request from user space */ + return EPERM; + } + + #if HFS_COMPRESSION if (VNODE_IS_RSRC(vp)) { @@ -204,6 +212,13 @@ read_again: filesize = fp->ff_size; filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; + + /* + * Check the file size. Note that per POSIX spec, we return 0 at + * file EOF, so attempting a read at an offset that is too big + * should just return 0 on HFS+. Since the return value was initialized + * to 0 above, we just jump to exit. HFS Standard has its own behavior. + */ if (offset > filesize) { if ((hfsmp->hfs_flags & HFS_STANDARD) && (offset > (off_t)MAXHFSFILESIZE)) { @@ -212,14 +227,14 @@ read_again: goto exit; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START, (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle); cp->c_touch_acctime = TRUE; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END, (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0); /* @@ -258,10 +273,14 @@ exit: } if (retval == EAGAIN) { throttle_lowpri_io(1); + throttled_count++; retval = 0; goto read_again; } + if (throttled_count) { + throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); + } return (retval); } @@ -294,6 +313,7 @@ hfs_vnop_write(struct vnop_write_args *ap) time_t orig_ctime=VTOC(vp)->c_ctime; int took_truncate_lock = 0; int io_return_on_throttle = 0; + int throttled_count = 0; struct rl_entry *invalid_range; #if HFS_COMPRESSION @@ -327,6 +347,13 @@ hfs_vnop_write(struct vnop_write_args *ap) #endif + if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) == + (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) { + /* Don't allow unencrypted io request from user space */ + return EPERM; + } + + resid = uio_resid(uio); offset = uio_offset(uio); @@ -367,7 +394,6 @@ hfs_vnop_write(struct vnop_write_args *ap) } again: - /* Protect against a size change. */ /* * Protect against a size change. * @@ -446,10 +472,6 @@ again: } cnode_locked = 1; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - hfs_incr_gencount (cp); - } - /* * Now that we have the cnode lock, see if there are delayed zero fill ranges * overlapping our write. If so, we need the truncate lock exclusive (see above). @@ -469,7 +491,7 @@ again: goto again; } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); @@ -518,7 +540,7 @@ again: if (retval != E_NONE) break; filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE, (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); } (void) hfs_update(vp, TRUE); @@ -708,6 +730,7 @@ sizeok: cp->c_touch_chgtime = TRUE; cp->c_touch_modtime = TRUE; + hfs_incr_gencount(cp); } if (filesize > fp->ff_size) { /* @@ -737,13 +760,7 @@ sizeok: fp->ff_bytesread = 0; } } - fp->ff_new_size = 0; /* ff_size now has the correct size */ - - /* If we wrote some bytes, then touch the change and mod times */ - if (resid > uio_resid(uio)) { - cp->c_touch_chgtime = TRUE; - cp->c_touch_modtime = TRUE; - } + fp->ff_new_size = 0; /* ff_size now has the correct size */ } if (partialwrite) { uio_setresid(uio, (uio_resid(uio) + bytesToAdd)); @@ -758,44 +775,43 @@ sizeok: } ioerr_exit: - /* - * If we successfully wrote any data, and we are not the superuser - * we clear the setuid and setgid bits as a precaution against - * tampering. - */ - if (cp->c_mode & (S_ISUID | S_ISGID)) { - cred = vfs_context_ucred(ap->a_context); - if (resid > uio_resid(uio) && cred && suser(cred, NULL)) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; + if (resid > uio_resid(uio)) { + if (!cnode_locked) { + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + cnode_locked = 1; + } + + cp->c_touch_chgtime = TRUE; + cp->c_touch_modtime = TRUE; + hfs_incr_gencount(cp); + + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + */ + if (cp->c_mode & (S_ISUID | S_ISGID)) { + cred = vfs_context_ucred(ap->a_context); + if (cred && suser(cred, NULL)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); } - cp->c_mode &= ~(S_ISUID | S_ISGID); } } if (retval) { if (ioflag & IO_UNIT) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - } (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC, - 0, 0, ap->a_context); + 0, ap->a_context); uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio)))); uio_setresid(uio, resid); filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; } - } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) { - if (!cnode_locked) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cnode_locked = 1; - } + } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) retval = hfs_update(vp, TRUE); - } + /* Updating vcbWrCnt doesn't need to be atomic. */ hfsmp->vcbWrCnt++; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END, (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0); exit: if (cnode_locked) @@ -806,10 +822,14 @@ exit: } if (retval == EAGAIN) { throttle_lowpri_io(1); + throttled_count++; retval = 0; goto again; } + if (throttled_count) { + throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread())); + } return (retval); } @@ -1031,15 +1051,15 @@ struct cinfo { }; static int -snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg) +snoop_callback(const cnode_t *cp, void *arg) { - struct cinfo *cip = (struct cinfo *)arg; + struct cinfo *cip = arg; - cip->uid = attrp->ca_uid; - cip->gid = attrp->ca_gid; - cip->mode = attrp->ca_mode; - cip->parentcnid = descp->cd_parentcnid; - cip->recflags = attrp->ca_recflags; + cip->uid = cp->c_uid; + cip->gid = cp->c_gid; + cip->mode = cp->c_mode; + cip->parentcnid = cp->c_parentcnid; + cip->recflags = cp->c_attr.ca_recflags; return (0); } @@ -1056,36 +1076,41 @@ do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid, /* if this id matches the one the fsctl was called with, skip the lookup */ if (cnid == skip_cp->c_cnid) { - cnattrp->ca_uid = skip_cp->c_uid; - cnattrp->ca_gid = skip_cp->c_gid; - cnattrp->ca_mode = skip_cp->c_mode; - cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; - keyp->hfsPlus.parentID = skip_cp->c_parentcnid; + cnattrp->ca_uid = skip_cp->c_uid; + cnattrp->ca_gid = skip_cp->c_gid; + cnattrp->ca_mode = skip_cp->c_mode; + cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags; + keyp->hfsPlus.parentID = skip_cp->c_parentcnid; } else { - struct cinfo c_info; - - /* otherwise, check the cnode hash incase the file/dir is incore */ - if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) { - cnattrp->ca_uid = c_info.uid; - cnattrp->ca_gid = c_info.gid; - cnattrp->ca_mode = c_info.mode; - cnattrp->ca_recflags = c_info.recflags; - keyp->hfsPlus.parentID = c_info.parentcnid; - } else { - int lockflags; - - if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) - throttle_lowpri_io(1); + struct cinfo c_info; + + /* otherwise, check the cnode hash incase the file/dir is incore */ + error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info); + + if (error == EACCES) { + // File is deleted + return ENOENT; + } else if (!error) { + cnattrp->ca_uid = c_info.uid; + cnattrp->ca_gid = c_info.gid; + cnattrp->ca_mode = c_info.mode; + cnattrp->ca_recflags = c_info.recflags; + keyp->hfsPlus.parentID = c_info.parentcnid; + } else { + int lockflags; + + if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp))) + throttle_lowpri_io(1); - lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK); - /* lookup this cnid in the catalog */ - error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); + /* lookup this cnid in the catalog */ + error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp); - hfs_systemfile_unlock(hfsmp, lockflags); + hfs_systemfile_unlock(hfsmp, lockflags); - cache->lookups++; - } + cache->lookups++; + } } return (error); @@ -1547,24 +1572,13 @@ do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp, /* end "bulk-access" support */ -/* - * Callback for use with freeze ioctl. - */ -static int -hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs) -{ - vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze"); - - return 0; -} - /* * Control filesystem operating characteristics. */ int hfs_vnop_ioctl( struct vnop_ioctl_args /* { vnode_t a_vp; - int a_command; + long a_command; caddr_t a_data; int a_fflag; vfs_context_t a_context; @@ -1654,64 +1668,133 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { return (error); } - case HFS_GET_WRITE_GEN_COUNTER: + case HFS_TRANSFER_DOCUMENT_ID: { struct cnode *cp = NULL; int error; - u_int32_t *counter = (u_int32_t *)ap->a_data; + u_int32_t to_fd = *(u_int32_t *)ap->a_data; + struct fileproc *to_fp; + struct vnode *to_vp; + struct cnode *to_cp; cp = VTOC(vp); - if (vnode_isdir (vp)) { - error = EISDIR; - *counter = 0; + if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) { + //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error); return error; } - - error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); - if (error == 0) { - struct ubc_info *uip; - int is_mapped = 0; + if ( (error = vnode_getwithref(to_vp)) ) { + file_drop(to_fd); + return error; + } + + if (VTOHFS(to_vp) != hfsmp) { + error = EXDEV; + goto transfer_cleanup; + } + + int need_unlock = 1; + to_cp = VTOC(to_vp); + error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); + if (error != 0) { + //printf("could not lock the pair of cnodes (error %d)\n", error); + goto transfer_cleanup; + } - if (UBCINFOEXISTS(vp)) { - uip = vp->v_ubcinfo; - if (uip->ui_flags & UI_ISMAPPED) { - is_mapped = 1; + if (!(cp->c_bsdflags & UF_TRACKED)) { + error = EINVAL; + } else if (to_cp->c_bsdflags & UF_TRACKED) { + // + // if the destination is already tracked, return an error + // as otherwise it's a silent deletion of the target's + // document-id + // + error = EEXIST; + } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { + // + // we can use the FndrExtendedFileInfo because the doc-id is the first + // thing in both it and the ExtendedDirInfo struct which is fixed in + // format and can not change layout + // + struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16); + struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16); + + if (f_extinfo->document_id == 0) { + uint32_t new_id; + + hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id + + if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) { + // + // re-lock the pair now that we have the document-id + // + hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK); + f_extinfo->document_id = new_id; + } else { + goto transfer_cleanup; } } + + to_extinfo->document_id = f_extinfo->document_id; + f_extinfo->document_id = 0; + //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid); + // make sure the destination is also UF_TRACKED + to_cp->c_bsdflags |= UF_TRACKED; + cp->c_bsdflags &= ~UF_TRACKED; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - uint32_t gcount = hfs_get_gencount(cp); - // - // Even though we return EBUSY for files that are mmap'ed - // we also want to bump the value so that the write-gen - // counter will always be different once the file is unmapped - // (since the file may be unmapped but the pageouts have not - // yet happened). - // - if (is_mapped) { - hfs_incr_gencount (cp); - gcount = hfs_get_gencount(cp); - } - - *counter = gcount; + // mark the cnodes dirty + cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; + to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE; - } - else { - /* not a file or dir? silently return */ - *counter = 0; + int lockflags; + if ((error = hfs_start_transaction(hfsmp)) == 0) { + + lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK); + + (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL); + (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL); + + hfs_systemfile_unlock (hfsmp, lockflags); + (void) hfs_end_transaction(hfsmp); } - hfs_unlock (cp); - if (is_mapped) { - error = EBUSY; +#if CONFIG_FSE + add_fsevent(FSE_DOCID_CHANGED, context, + FSE_ARG_DEV, hfsmp->hfs_raw_dev, + FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode # + FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode # + FSE_ARG_INT32, to_extinfo->document_id, + FSE_ARG_DONE); + + hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents + need_unlock = 0; + + if (need_fsevent(FSE_STAT_CHANGED, vp)) { + add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE); + } + if (need_fsevent(FSE_STAT_CHANGED, to_vp)) { + add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE); } +#else + hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents + need_unlock = 0; +#endif + } + + if (need_unlock) { + hfs_unlockpair(cp, to_cp); } + transfer_cleanup: + vnode_put(to_vp); + file_drop(to_fd); + return error; } + + case HFS_PREV_LINK: case HFS_NEXT_LINK: { @@ -1883,20 +1966,11 @@ fail_change_next_allocation: vnode_ref(bsfs_rootvp); vnode_put(bsfs_rootvp); + hfs_lock_mount(hfsmp); hfsmp->hfs_backingfs_rootvp = bsfs_rootvp; - hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE; - /* The free extent cache is managed differently for sparse devices. - * There is a window between which the volume is mounted and the - * device is marked as sparse, so the free extent cache for this - * volume is currently initialized as normal volume (sorted by block - * count). Reset the cache so that it will be rebuilt again - * for sparse device (sorted by start block). - */ - ResetVCBFreeExtCache(hfsmp); - - hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize; - hfsmp->hfs_sparsebandblks *= 4; + hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4; + hfs_unlock_mount(hfsmp); /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */ @@ -1919,6 +1993,15 @@ fail_change_next_allocation: } } + /* The free extent cache is managed differently for sparse devices. + * There is a window between which the volume is mounted and the + * device is marked as sparse, so the free extent cache for this + * volume is currently initialized as normal volume (sorted by block + * count). Reset the cache so that it will be rebuilt again + * for sparse device (sorted by start block). + */ + ResetVCBFreeExtCache(hfsmp); + (void)vnode_put(di_vp); file_drop(bsdata->backingfd); return (0); @@ -1938,10 +2021,13 @@ fail_change_next_allocation: if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) { + hfs_lock_mount(hfsmp); hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE; tmpvp = hfsmp->hfs_backingfs_rootvp; hfsmp->hfs_backingfs_rootvp = NULLVP; hfsmp->hfs_sparsebandblks = 0; + hfs_unlock_mount(hfsmp); + vnode_rele(tmpvp); } return (0); @@ -2007,38 +2093,7 @@ fail_change_next_allocation: !kauth_cred_issuser(cred)) return (EACCES); - lck_rw_lock_exclusive(&hfsmp->hfs_insync); - - // flush things before we get started to try and prevent - // dirty data from being paged out while we're frozen. - // note: can't do this after taking the lock as it will - // deadlock against ourselves. - vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); - hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK); - - // DO NOT call hfs_journal_flush() because that takes a - // shared lock on the global exclusive lock! - journal_flush(hfsmp->jnl, TRUE); - - // don't need to iterate on all vnodes, we just need to - // wait for writes to the system files and the device vnode - // - // Now that journal flush waits for all metadata blocks to - // be written out, waiting for btree writes is probably no - // longer required. - if (HFSTOVCB(hfsmp)->extentsRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); - if (HFSTOVCB(hfsmp)->catalogRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze"); - if (HFSTOVCB(hfsmp)->allocationsRefNum) - vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze"); - if (hfsmp->hfs_attribute_vp) - vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze"); - vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze"); - - hfsmp->hfs_freezing_proc = current_proc(); - - return (0); + return hfs_freeze(hfsmp); } case F_THAW_FS: { @@ -2047,20 +2102,7 @@ fail_change_next_allocation: !kauth_cred_issuser(cred)) return (EACCES); - // if we're not the one who froze the fs then we - // can't thaw it. - if (hfsmp->hfs_freezing_proc != current_proc()) { - return EPERM; - } - - // NOTE: if you add code here, also go check the - // code that "thaws" the fs in hfs_vnop_close() - // - hfsmp->hfs_freezing_proc = NULL; - hfs_unlock_global (hfsmp); - lck_rw_unlock_exclusive(&hfsmp->hfs_insync); - - return (0); + return hfs_thaw(hfsmp, current_proc()); } case HFS_BULKACCESS_FSCTL: { @@ -2199,6 +2241,52 @@ fail_change_next_allocation: return error; } + case F_SETIOTYPE: { + int error; + uint32_t iotypeflag = 0; + + struct cnode *cp = NULL; + /* + * lock the cnode, decorate the cnode flag, and bail out. + * VFS should have already authenticated the caller for us. + */ + + if (ap->a_data == NULL) { + return EINVAL; + } + + /* + * Note that even though ap->a_data is of type caddr_t, we + * can only use 32 bits of flag values. + */ + iotypeflag = (uint32_t) ap->a_data; + switch (iotypeflag) { + case F_IOTYPE_ISOCHRONOUS: + break; + default: + return EINVAL; + } + + + if (hfsmp->hfs_flags & HFS_READ_ONLY) { + return EROFS; + } + cp = VTOC(vp); + + error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error == 0) { + switch (iotypeflag) { + case F_IOTYPE_ISOCHRONOUS: + cp->c_flag |= C_IO_ISOCHRONOUS; + break; + default: + break; + } + hfs_unlock (cp); + } + return error; + } + case F_MAKECOMPRESSED: { int error = 0; uint32_t gen_counter; @@ -2235,7 +2323,7 @@ fail_change_next_allocation: hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT); return error; } - + /* Are there any other usecounts/FDs? */ if (vnode_isinuse(vp, 1)) { hfs_unlock(cp); @@ -2243,7 +2331,6 @@ fail_change_next_allocation: return EBUSY; } - /* now we have the cnode locked down; Validate arguments */ if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) { /* EINVAL if you are trying to manipulate an IMMUTABLE file */ @@ -2259,8 +2346,9 @@ fail_change_next_allocation: */ reset_decmp = 1; cp->c_bsdflags |= UF_COMPRESSED; - - error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context); + + error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES, + ap->a_context); } else { error = ESTALE; @@ -2503,7 +2591,7 @@ fail_change_next_allocation: return (EROFS); } printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n"); - hfs_mark_volume_inconsistent(hfsmp); + hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED); break; case HFS_FSCTL_GET_JOURNAL_INFO: @@ -2549,7 +2637,144 @@ fail_change_next_allocation: printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN); break; } - + + + case HFS_FSINFO_METADATA_BLOCKS: { + int error; + struct hfsinfo_metadata *hinfo; + + hinfo = (struct hfsinfo_metadata *)ap->a_data; + + /* Get information about number of metadata blocks */ + error = hfs_getinfo_metadata_blocks(hfsmp, hinfo); + if (error) { + return error; + } + + break; + } + + case HFS_GET_FSINFO: { + hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data; + + /* Only root is allowed to get fsinfo */ + if (!kauth_cred_issuser(kauth_cred_get())) { + return EACCES; + } + + /* + * Make sure that the caller's version number matches with + * the kernel's version number. This will make sure that + * if the structures being read/written into are changed + * by the kernel, the caller will not read incorrect data. + * + * The first three fields --- request_type, version and + * flags are same for all the hfs_fsinfo structures, so + * we can access the version number by assuming any + * structure for now. + */ + if (fsinfo->header.version != HFS_FSINFO_VERSION) { + return ENOTSUP; + } + + /* Make sure that the current file system is not marked inconsistent */ + if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) { + return EIO; + } + + return hfs_get_fsinfo(hfsmp, ap->a_data); + } + + case HFS_CS_FREESPACE_TRIM: { + int error = 0; + int lockflags = 0; + + /* Only root allowed */ + if (!kauth_cred_issuser(kauth_cred_get())) { + return EACCES; + } + + /* + * This core functionality is similar to hfs_scan_blocks(). + * The main difference is that hfs_scan_blocks() is called + * as part of mount where we are assured that the journal is + * empty to start with. This fcntl() can be called on a + * mounted volume, therefore it has to flush the content of + * the journal as well as ensure the state of summary table. + * + * This fcntl scans over the entire allocation bitmap, + * creates list of all the free blocks, and issues TRIM + * down to the underlying device. This can take long time + * as it can generate up to 512MB of read I/O. + */ + + if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) { + error = hfs_init_summary(hfsmp); + if (error) { + printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN); + return error; + } + } + + /* + * The journal maintains list of recently deallocated blocks to + * issue DKIOCUNMAPs when the corresponding journal transaction is + * flushed to the disk. To avoid any race conditions, we only + * want one active trim list and only one thread issuing DKIOCUNMAPs. + * Therefore we make sure that the journal trim list is sync'ed, + * empty, and not modifiable for the duration of our scan. + * + * Take the journal lock before flushing the journal to the disk. + * We will keep on holding the journal lock till we don't get the + * bitmap lock to make sure that no new journal transactions can + * start. This will make sure that the journal trim list is not + * modified after the journal flush and before getting bitmap lock. + * We can release the journal lock after we acquire the bitmap + * lock as it will prevent any further block deallocations. + */ + hfs_journal_lock(hfsmp); + + /* Flush the journal and wait for all I/Os to finish up */ + error = hfs_journal_flush(hfsmp, TRUE); + if (error) { + hfs_journal_unlock(hfsmp); + return error; + } + + /* Take bitmap lock to ensure it is not being modified */ + lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); + + /* Release the journal lock */ + hfs_journal_unlock(hfsmp); + + /* + * ScanUnmapBlocks reads the bitmap in large block size + * (up to 1MB) unlike the runtime which reads the bitmap + * in the 4K block size. This can cause buf_t collisions + * and potential data corruption. To avoid this, we + * invalidate all the existing buffers associated with + * the bitmap vnode before scanning it. + * + * Note: ScanUnmapBlock() cleans up all the buffers + * after itself, so there won't be any large buffers left + * for us to clean up after it returns. + */ + error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0); + if (error) { + hfs_systemfile_unlock(hfsmp, lockflags); + return error; + } + + /* Traverse bitmap and issue DKIOCUNMAPs */ + error = ScanUnmapBlocks(hfsmp); + hfs_systemfile_unlock(hfsmp, lockflags); + if (error) { + return error; + } + + break; + } + default: return (ENOTTY); } @@ -2982,13 +3207,19 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) /* Mark buffer as containing static data if cnode flag set */ if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) { - bufattr_markgreedymode((bufattr_t)(&bp->b_attr)); + bufattr_markgreedymode(&bp->b_attr); + } + + /* mark buffer as containing burst mode data if cnode flag set */ + if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) { + bufattr_markisochronous(&bp->b_attr); } #if CONFIG_PROTECT cnode_t *cp = NULL; - if ((cp = cp_get_protected_cnode(vp)) != NULL) { + if ((!bufattr_rawencrypted(&bp->b_attr)) && + ((cp = cp_get_protected_cnode(vp)) != NULL)) { /* * We rely upon the truncate lock to protect the * CP cache key from getting tossed prior to our IO finishing here. @@ -3011,8 +3242,31 @@ hfs_vnop_strategy(struct vnop_strategy_args *ap) * with the CP blob being wiped out in the middle of the IO * because there isn't anything to toss; the VM swapfile key stays * in-core as long as the file is open. - * - * NB: + */ + + + /* + * Last chance: If this data protected I/O does not have unwrapped keys + * present, then try to get them. We already know that it should, by this point. + */ + if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) { + int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS); + if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) { + /* + * We have to be careful here. By this point in the I/O path, VM or the cluster + * engine has prepared a buf_t with the proper file offsets and all the rest, + * so simply erroring out will result in us leaking this particular buf_t. + * We need to properly decorate the buf_t just as buf_strategy would so as + * to make it appear that the I/O errored out with the particular error code. + */ + buf_seterror (bp, error); + buf_biodone(bp); + return error; + } + } + + /* + *NB: * For filesystem resize, we may not have access to the underlying * file's cache key for whatever reason (device may be locked). However, * we do not need it since we are going to use the temporary HFS-wide resize key @@ -3047,7 +3301,6 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf { register struct cnode *cp = VTOC(vp); struct filefork *fp = VTOF(vp); - struct proc *p = vfs_context_proc(context);; kauth_cred_t cred = vfs_context_ucred(context); int retval; off_t bytesToAdd; @@ -3059,12 +3312,12 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf int lockflags; int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE); int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES); - + blksize = VTOVCB(vp)->blockSize; fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START, (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); if (length < 0) @@ -3118,8 +3371,9 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf /* All or nothing and don't round up to clumpsize. */ eflags = kEFAllMask | kEFNoClumpMask; - if (cred && suser(cred, NULL) != 0) + if (cred && (suser(cred, NULL) != 0)) { eflags |= kEFReserveMask; /* keep a reserve */ + } /* * Allocate Journal and Quota files in metadata zone. @@ -3141,6 +3395,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf lockflags |= SFL_EXTENTS; lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + /* + * Keep growing the file as long as the current EOF is + * less than the desired value. + */ while ((length > filebytes) && (retval == E_NONE)) { bytesToAdd = length - filebytes; retval = MacToVFSError(ExtendFileC(VTOVCB(vp), @@ -3175,11 +3433,15 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf if (retval) goto Err_Exit; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, (int)length, (int)fp->ff_size, (int)filebytes, 0, 0); } - if (!(flags & IO_NOZEROFILL)) { + if (ISSET(flags, IO_NOZEROFILL)) { + // An optimisation for the hibernation file + if (vnode_isswap(vp)) + rl_remove_all(&fp->ff_invalidranges); + } else { if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) { struct rl_entry *invalid_range; off_t zero_limit; @@ -3234,7 +3496,10 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf } else { /* Shorten the size of the file */ - if ((off_t)fp->ff_size > length) { + // An optimisation for the hibernation file + if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) { + rl_remove_all(&fp->ff_invalidranges); + } else if ((off_t)fp->ff_size > length) { /* Any space previously marked as invalid is now irrelevant: */ rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges); } @@ -3268,55 +3533,48 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf hfs_unlock_mount (hfsmp); } - /* - * For a TBE process the deallocation of the file blocks is - * delayed until the file is closed. And hfs_close calls - * truncate with the IO_NDELAY flag set. So when IO_NDELAY - * isn't set, we make sure this isn't a TBE process. - */ - if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) { #if QUOTA - off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); + off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize); #endif /* QUOTA */ - if (hfs_start_transaction(hfsmp) != 0) { - retval = EINVAL; - goto Err_Exit; - } + if (hfs_start_transaction(hfsmp) != 0) { + retval = EINVAL; + goto Err_Exit; + } - if (fp->ff_unallocblocks == 0) { - /* Protect extents b-tree and allocation bitmap */ - lockflags = SFL_BITMAP; - if (overflow_extents(fp)) - lockflags |= SFL_EXTENTS; - lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); + if (fp->ff_unallocblocks == 0) { + /* Protect extents b-tree and allocation bitmap */ + lockflags = SFL_BITMAP; + if (overflow_extents(fp)) + lockflags |= SFL_EXTENTS; + lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); - retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, - FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); + retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0, + FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false)); - hfs_systemfile_unlock(hfsmp, lockflags); + hfs_systemfile_unlock(hfsmp, lockflags); + } + if (hfsmp->jnl) { + if (retval == 0) { + fp->ff_size = length; } - if (hfsmp->jnl) { - if (retval == 0) { - fp->ff_size = length; - } - if (skipupdate) { - (void) hfs_minorupdate(vp); - } - else { - (void) hfs_update(vp, TRUE); - (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); - } + if (skipupdate) { + (void) hfs_minorupdate(vp); } - hfs_end_transaction(hfsmp); + else { + (void) hfs_update(vp, TRUE); + (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); + } + } + hfs_end_transaction(hfsmp); - filebytes = (off_t)fp->ff_blocks * (off_t)blksize; - if (retval) - goto Err_Exit; + filebytes = (off_t)fp->ff_blocks * (off_t)blksize; + if (retval) + goto Err_Exit; #if QUOTA - /* These are bytesreleased */ - (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); + /* These are bytesreleased */ + (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0); #endif /* QUOTA */ - } + /* * Only set update flag if the logical length changes & we aren't * suppressing modtime updates. @@ -3352,13 +3610,13 @@ do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vf retval = hfs_update(vp, MNT_WAIT); } if (retval) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE, -1, -1, -1, retval, 0); } Err_Exit: - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END, + KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END, (int)length, (int)fp->ff_size, (int)filebytes, retval, 0); return (retval); @@ -3469,14 +3727,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, blksize = hfsmp->blockSize; /* Data Fork */ - if ((datafork != NULL) && (datafork->ff_blocks > 0)) { + if (datafork) { + datafork->ff_size = 0; + fileblocks = datafork->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ while (filebytes > 0) { - if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) { + if (filebytes > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = 0; @@ -3499,9 +3759,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, hfs_systemfile_unlock(hfsmp, lockflags); } - if (error == 0) { - datafork->ff_size = filebytes; - } (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); /* Finish the transaction and start over if necessary */ @@ -3514,14 +3771,16 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, } /* Resource fork */ - if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) { + if (error == 0 && rsrcfork) { + rsrcfork->ff_size = 0; + fileblocks = rsrcfork->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; /* We killed invalid ranges and loaned blocks before we removed the catalog entry */ while (filebytes > 0) { - if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) { + if (filebytes > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = 0; @@ -3544,9 +3803,6 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, hfs_systemfile_unlock(hfsmp, lockflags); } - if (error == 0) { - rsrcfork->ff_size = filebytes; - } (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0); /* Finish the transaction and start over if necessary */ @@ -3561,19 +3817,52 @@ hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork, return error; } +errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock) +{ + errno_t error; + + /* + * Call ubc_setsize to give the VM subsystem a chance to do + * whatever it needs to with existing pages before we delete + * blocks. Note that symlinks don't use the UBC so we'll + * get back ENOENT in that case. + */ + if (have_cnode_lock) { + error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY); + if (error == EAGAIN) { + cnode_t *cp = VTOC(vp); + + if (cp->c_truncatelockowner != current_thread()) { +#if DEVELOPMENT || DEBUG + panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!"); +#else + printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n"); +#endif + } + + hfs_unlock(cp); + error = ubc_setsize_ex(vp, len, 0); + hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK); + } + } else + error = ubc_setsize_ex(vp, len, 0); + + return error == ENOENT ? 0 : error; +} /* * Truncate a cnode to at most length size, freeing (or adding) the * disk blocks. */ int -hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, - int truncateflags, vfs_context_t context) +hfs_truncate(struct vnode *vp, off_t length, int flags, + int truncateflags, vfs_context_t context) { - struct filefork *fp = VTOF(vp); + struct filefork *fp = VTOF(vp); off_t filebytes; u_int32_t fileblocks; - int blksize, error = 0; + int blksize; + errno_t error = 0; struct cnode *cp = VTOC(vp); /* Cannot truncate an HFS directory! */ @@ -3581,7 +3870,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, return (EISDIR); } /* A swap file cannot change size. */ - if (vnode_isswap(vp) && (length != 0)) { + if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) { return (EPERM); } @@ -3589,24 +3878,17 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, fileblocks = fp->ff_blocks; filebytes = (off_t)fileblocks * (off_t)blksize; - // - // Have to do this here so that we don't wind up with - // i/o pending for blocks that are about to be released - // if we truncate the file. - // - // If skipsetsize is set, then the caller is responsible - // for the ubc_setsize. - // - // Even if skipsetsize is set, if the length is zero we - // want to call ubc_setsize() because as of SnowLeopard - // it will no longer cause any page-ins and it will drop - // any dirty pages so that we don't do any i/o that we - // don't have to. This also prevents a race where i/o - // for truncated blocks may overwrite later data if the - // blocks get reallocated to a different file. - // - if (!skipsetsize || length == 0) - ubc_setsize(vp, length); + bool caller_has_cnode_lock = (cp->c_lockowner == current_thread()); + + error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock); + if (error) + return error; + + if (!caller_has_cnode_lock) { + error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT); + if (error) + return error; + } // have to loop truncating or growing files that are // really big because otherwise transactions can get @@ -3614,7 +3896,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, if (length < filebytes) { while (filebytes > length) { - if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { + if ((filebytes - length) > HFS_BIGFILE_SIZE) { filebytes -= HFS_BIGFILE_SIZE; } else { filebytes = length; @@ -3626,7 +3908,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, } } else if (length > filebytes) { while (filebytes < length) { - if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) { + if ((length - filebytes) > HFS_BIGFILE_SIZE) { filebytes += HFS_BIGFILE_SIZE; } else { filebytes = length; @@ -3645,9 +3927,16 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, fp->ff_bytesread = 0; } - return (error); -} + if (!caller_has_cnode_lock) + hfs_unlock(cp); + // Make sure UBC's size matches up (in case we didn't completely succeed) + errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock); + if (!error) + error = err2; + + return error; +} /* @@ -3785,13 +4074,13 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { /* Protect extents b-tree and allocation bitmap */ lockflags = SFL_BITMAP; if (overflow_extents(fp)) - lockflags |= SFL_EXTENTS; + lockflags |= SFL_EXTENTS; lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK); if (moreBytesRequested >= HFS_BIGFILE_SIZE) { - bytesRequested = HFS_BIGFILE_SIZE; + bytesRequested = HFS_BIGFILE_SIZE; } else { - bytesRequested = moreBytesRequested; + bytesRequested = moreBytesRequested; } if (extendFlags & kEFContigMask) { @@ -3848,14 +4137,18 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { } else { /* Shorten the size of the file */ - if (fp->ff_size > length) { - /* - * Any buffers that are past the truncation point need to be - * invalidated (to maintain buffer cache consistency). - */ - } + /* + * N.B. At present, this code is never called. If and when we + * do start using it, it looks like there might be slightly + * strange semantics with the file size: it's possible for the + * file size to *increase* e.g. if current file size is 5, + * length is 1024 and filebytes is 4096, the file size will + * end up being 1024 bytes. This isn't necessarily a problem + * but it's not consistent with the code above which doesn't + * change the file size. + */ - retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context); + retval = hfs_truncate(vp, length, 0, 0, ap->a_context); filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize; /* @@ -3871,9 +4164,7 @@ hfs_vnop_allocate(struct vnop_allocate_args /* { if (fp->ff_size > filebytes) { fp->ff_size = filebytes; - hfs_unlock(cp); - ubc_setsize(vp, fp->ff_size); - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + hfs_ubc_setsize(vp, fp->ff_size, true); } } @@ -3915,8 +4206,10 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) upl_t upl; upl_page_info_t *pl; off_t f_offset; + off_t page_needed_f_offset; int offset; int isize; + int upl_size; int pg_index; boolean_t truncate_lock_held = FALSE; boolean_t file_converted = FALSE; @@ -3965,6 +4258,8 @@ hfs_vnop_pagein(struct vnop_pagein_args *ap) goto pagein_done; } + page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset; + retry_pagein: /* * take truncate lock (shared/recursive) to guard against @@ -4027,9 +4322,9 @@ retry_pagein: } ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1); - isize = ap->a_size; + upl_size = isize = ap->a_size; - /* + /* * Scan from the back to find the last page in the UPL, so that we * aren't looking at a UPL that may have already been freed by the * preceding aborts/completions. @@ -4097,6 +4392,7 @@ retry_pagein: int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */ if (compressed) { + if (truncate_lock_held) { /* * can't hold the truncate lock when calling into the decmpfs layer @@ -4136,6 +4432,19 @@ retry_pagein: * indication that the pagein needs to be redriven */ ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART); + } else if (error == ENOSPC) { + + if (upl_size == PAGE_SIZE) + panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n"); + + ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY); + + ap->a_size = PAGE_SIZE; + ap->a_pl = NULL; + ap->a_pl_offset = 0; + ap->a_f_offset = page_needed_f_offset; + + goto retry_pagein; } goto pagein_next_range; } @@ -4271,10 +4580,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) a_flags = ap->a_flags; a_pl_offset = ap->a_pl_offset; - if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) { - hfs_incr_gencount (cp); - } - /* * we can tell if we're getting the new or old behavior from the UPL */ @@ -4503,20 +4808,41 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap) } /* - * If data was written, update the modification time of the file. - * If setuid or setgid bits are set and this process is not the - * superuser then clear the setuid and setgid bits as a precaution - * against tampering. + * If data was written, update the modification time of the file + * but only if it's mapped writable; we will have touched the + * modifcation time for direct writes. */ - if (retval == 0) { - cp->c_touch_modtime = TRUE; - cp->c_touch_chgtime = TRUE; - if ((cp->c_mode & (S_ISUID | S_ISGID)) && - (vfs_context_suser(ap->a_context) != 0)) { - hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); - cp->c_mode &= ~(S_ISUID | S_ISGID); - hfs_unlock(cp); + if (retval == 0 && (ubc_is_mapped_writable(vp) + || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) { + hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS); + + // Check again with lock + bool mapped_writable = ubc_is_mapped_writable(vp); + if (mapped_writable + || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) { + cp->c_touch_modtime = TRUE; + cp->c_touch_chgtime = TRUE; + + /* + * We only need to increment the generation counter if + * it's currently mapped writable because we incremented + * the counter in hfs_vnop_mnomap. + */ + if (mapped_writable) + hfs_incr_gencount(VTOC(vp)); + + /* + * If setuid or setgid bits are set and this process is + * not the superuser then clear the setuid and setgid bits + * as a precaution against tampering. + */ + if ((cp->c_mode & (S_ISUID | S_ISGID)) && + (vfs_context_suser(ap->a_context) != 0)) { + cp->c_mode &= ~(S_ISUID | S_ISGID); + } } + + hfs_unlock(cp); } pageout_done: @@ -4972,7 +5298,7 @@ hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize) ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY); } else { /* - * No need to call ubc_sync_range or hfs_invalbuf + * No need to call ubc_msync or hfs_invalbuf * since the file was copied using IO_NOCACHE and * the copy was done starting and ending on a page * boundary in the file.